In [93]:
import glob
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import metrics
import tensorflow as tf
from tensorflow.python.data import Dataset

In [94]:
featureVectorSize = 140
tf.logging.set_verbosity(tf.logging.ERROR)
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.1f}'.format

In [95]:
def computeFeatureColumns():
    """Construct the TensorFlow Feature Columns.
    Returns:
      A set of feature columns
    """
    return set([tf.feature_column.numeric_column('audioFeatures', shape=featureVectorSize)])

In [96]:
def computeTrainInputFn(features, labels, batch_size, num_epochs=None, shuffle=True):
    def inputFunction(num_epochs=num_epochs, shuffle=True):
        random_array = np.random.permutation(features.index)
        raw_features = {"audioFeatures": features.reindex(random_array)}
        raw_labels = np.array(labels[random_array])

        datatens = Dataset.from_tensor_slices((raw_features, raw_labels))
        datatens = datatens.batch(batch_size).repeat(num_epochs)

        if shuffle:
            datatens = datatens.shuffle(10000)
        # Returns the next batch of data.
        feature_batch, label_batch = datatens.make_one_shot_iterator().get_next()
        return feature_batch, label_batch

    return inputFunction

In [98]:
def createPredictInputFunction(features, labels, batch_size):

    def inputFunction():
        raw_features = {"audioFeatures": features.values}
        raw_labels = np.array(labels)

        datatens = Dataset.from_tensor_slices((raw_features, raw_labels))
        datatens = datatens.batch(batch_size)

        # Returns the next batch of data.
        feature_batch, label_batch = datatens.make_one_shot_iterator().get_next()
        return feature_batch, label_batch

    return inputFunction

In [99]:
def trainNNClassifier(
        learning_rate,
        regularization_strength,
        steps,
        batch_size,
        hidden_units,
        training_examples,
        training_labels,
        validation_examples,
        validation_labels,
        model_Name='no_Name'):
    periods = 10
    steps_per_period = steps / periods

    # Create the input functions.
    predict_traininginputFunction = createPredictInputFunction(
        training_examples, training_labels, batch_size)
    predict_validationinputFunction = createPredictInputFunction(
        validation_examples, validation_labels, batch_size)
    traininginputFunction = computeTrainInputFn(
        training_examples, training_labels, batch_size)

    # Create feature columns.
    feature_columns = computeFeatureColumns()

    # Create a DNNClassifier object.
    my_optimizer = tf.train.ProximalAdagradOptimizer(
        learning_rate=learning_rate,
        l2_regularization_strength=regularization_strength  # can be swapped for l1 regularization
    )

    classifier = tf.estimator.DNNClassifier(
        feature_columns=feature_columns,
        n_classes=10,
        hidden_units=hidden_units,
        optimizer=my_optimizer,
        config=tf.contrib.learn.RunConfig(keep_checkpoint_max=1)
    )

    # Train the model, but do so inside a loop so that we can periodically assess loss metrics.
    print("Training model --->> ")
    print("LogLoss error computed on the validation data is :")
    training_errors = []
    validation_errors = []
    
    for period in range(0, periods):
        # Train the model, starting from the prior state.
        classifier.train(
            input_fn=traininginputFunction,
            steps=steps_per_period
        )

        # Use the current model to make predictions on both, the training and validation set.
        training_predictions = list(classifier.predict(input_fn=predict_traininginputFunction))
        training_pred_class_id = np.array([item['class_ids'][0] for item in training_predictions])
        training_pred_one_hot = tf.keras.utils.to_categorical(training_pred_class_id, 10)

        validation_predictions = list(classifier.predict(input_fn=predict_validationinputFunction))
        validation_pred_class_id = np.array([item['class_ids'][0] for item in validation_predictions])
        validation_pred_one_hot = tf.keras.utils.to_categorical(validation_pred_class_id, 10)

        # Use predictions to compute training and validation errors.
        training_log_loss = metrics.log_loss(training_labels, training_pred_one_hot)
        validation_log_loss = metrics.log_loss(validation_labels, validation_pred_one_hot)

        # Print validation error of current model.
        print("  period %02d : %0.2f" % (period, validation_log_loss))

        # Store loss metrics so we can plot them later.
        training_errors.append(training_log_loss)
        validation_errors.append(validation_log_loss)

    print("Model training IS finished.")
    # Remove event files to save disk space.
    _ = map(os.remove, glob.glob(os.path.join(classifier.model_dir, 'events.out.tfevents*')))

    # Compute predictions of final model.
    final_predictions = classifier.predict(input_fn=predict_validationinputFunction)
    final_predictions = np.array([item['class_ids'][0] for item in final_predictions])

    # Evaluate predictions of final model.
    accuracy = metrics.accuracy_score(validation_labels, final_predictions)
    print("Final accuracy (on validation data): %0.2f" % accuracy)

    # Output a graph of loss metrics over periods.
    plt.ylabel("LogLoss")
    plt.xlabel("Periods")
    plt.title("LogLoss vs. Periods")
    plt.plot(training_errors, label="training")
    plt.plot(validation_errors, label="validation")
    plt.legend()
    # plt.show()  # blocks execution
    plt.savefig('Results\\' + model_Name + '_loss_curve.png', bbox_inches='tight')
    plt.gcf().clear()

    # Create a confusion matrix.
    confusionMatrixPlot = metrics.confusion_matrix(validation_labels, final_predictions)

    # Normalize the confusion matrix by the number of samples in each class (rows).
    confusionMatrixPlot_normalized = confusionMatrixPlot.astype("float") / confusionMatrixPlot.sum(axis=1)[:, np.newaxis]
    ax = sns.heatmap(confusionMatrixPlot_normalized, cmap="bone_r")
    ax.set_aspect(1)
    plt.title("Confusion matrix")
    plt.ylabel("True label")
    plt.xlabel("Predicted label")
    plt.savefig('Results\\' + model_Name + '_confusion_matrix.png', bbox_inches='tight')
    plt.gcf().clear()

    return classifier

In [100]:
def runSession():
    def runSession():
    # unpickle and prepare training data
        # unpickle and prepare training data
    training_examples_data = pd.read_pickle('Extracted_Features-notFold10_features.pkl')
    mean = np.mean(training_examples_data, axis=0)  
    std = np.std(training_examples_data, axis=0, ddof=1) 

    training_examples_data -= mean 
    training_examples_data /= std 
    training_examples = training_examples_data
    
    
    training_labels = pd.read_pickle('Extracted_Features-notFold10_labels.pkl')

    # unpickle and prepare validation data
    validation_examples_data = pd.read_pickle('Extracted_Features-fold10_features.pkl')
    mean = np.mean(validation_examples_data, axis=0)  
    std = np.std(validation_examples_data, axis=0, ddof=1) 

    validation_examples_data -= mean 
    validation_examples_data /= std 
    validation_examples = validation_examples_data
    
    
    validation_labels = pd.read_pickle('Extracted_Features-fold10_labels.pkl')
    
    for learning_rate in [0.001, 0.003, 0.01, 0.03, 0.1, 0.3]:
        for regularization_strength in [0.0, 0.003, 0.03, 0.3]:
            print("##########################################################################")
            print("Learning rate:", learning_rate)
            print("Regularization:", regularization_strength)
            train_nn_classification_model(
                learning_rate=0.003,
                regularization_strength=0.2,
                steps=10000,
                batch_size=32,
                hidden_units=[120],
                training_examples=training_examples,
                training_labels=training_labels,
                validation_examples=validation_examples,
                validation_labels=validation_labels)


In [101]:
runSession()

##########################################################################
Learning rate: 0.001
Regularization: 0.0
Training model --->> 
LogLoss error computed on the validation data is :
  period 00 : 12.09
  period 01 : 11.18
  period 02 : 11.14
  period 03 : 11.06
  period 04 : 10.89
  period 05 : 10.69
  period 06 : 10.40
  period 07 : 10.36
  period 08 : 10.27
  period 09 : 10.27
Model training IS finished.
Final accuracy (on validation data): 0.70
##########################################################################
Learning rate: 0.001
Regularization: 0.003
Training model --->> 
LogLoss error computed on the validation data is :
  period 00 : 11.80
  period 01 : 10.77
  period 02 : 10.77
  period 03 : 10.56
  period 04 : 10.77
  period 05 : 10.69
  period 06 : 10.61
  period 07 : 10.65
  period 08 : 10.85
  period 09 : 10.77
Model training IS finished.
Final accuracy (on validation data): 0.69
##########################################################################
Learn

##########################################################################
Learning rate: 0.1
Regularization: 0.03
Training model --->> 
LogLoss error computed on the validation data is :
  period 00 : 11.43
  period 01 : 11.02
  period 02 : 10.65
  period 03 : 10.56
  period 04 : 10.40
  period 05 : 10.32
  period 06 : 10.19
  period 07 : 10.36
  period 08 : 10.19
  period 09 : 10.44
Model training IS finished.
Final accuracy (on validation data): 0.70
##########################################################################
Learning rate: 0.1
Regularization: 0.3
Training model --->> 
LogLoss error computed on the validation data is :
  period 00 : 11.14
  period 01 : 10.52
  period 02 : 10.61
  period 03 : 10.48
  period 04 : 10.23
  period 05 : 10.11
  period 06 : 10.07
  period 07 : 9.86
  period 08 : 10.07
  period 09 : 9.94
Model training IS finished.
Final accuracy (on validation data): 0.71
##########################################################################
Learning rat

<Figure size 432x288 with 0 Axes>