# License

In [1]:
# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

# Todos

* Separate out prediction, loading the model
* Confirm audio samples are being loaded in a way that is extensible
* Train with all samples

In [8]:
def installDeps():
    !pip install numpy scipy
    !pip install resampy tensorflow six
    !pip install youtube_dl
    !pip install ipywidgets
    !pip install pydub
    !pip install tqdm
    !pip install ffmpeg-python
    !apt-get install ffmpeg
#!python vggish_train_demo.py --num_batches 50 --train_vggish=False --checkpoint './vggish_model.ckpt'

from __future__ import print_function

from random import shuffle

import numpy as np
import tensorflow as tf
import os
import vggish_input
import vggish_params
import vggish_slim
from pydub import AudioSegment
from audioUtils import readFolder

slim = tf.contrib.slim

In [10]:
def getFilePathsForClass(c, max):
    dirs = readFolder('samples/%s' % (c))
    collected_files = []
    for d in dirs[:max]:
        files = readFolder('samples/%s/%s/out' % (c, d))

        for file in files:
            collected_files.append('samples/%s/%s/out/%s' % (c, d, file))
    return collected_files
            
def getSampleForFile(file):
    return AudioSegment.from_file(file).get_array_of_samples()

# accepts a numpy array representing a single audio file, or multiple files concat'ed together
def getFileAsVggishInput(sample):
    return vggish_input.waveform_to_examples(sample, 44100)

# append every audio file into one enormous massive audio file
def getSamplesForFiles(files):
    sample = np.array([])
    
    for file in files:
        audio = getSampleForFile(file)
        sample = np.append(sample, audio)
    return getFileAsVggishInput(sample) 

def getData(files, arr):
    examples = getSamplesForFiles(files)
    labels = np.array([arr] * examples.shape[0])
    
    return (examples, labels)

def getOneHot(class_num, idx):
    arr = np.zeros(class_num)
    arr[idx] = 1
    return arr

def getSamples(classes, shuf = True, num = None):
    exes = []
    whys = []
    print('collecting samples')
    for idx, cls in enumerate(classes):
        files = getFilePathsForClass(cls, num)
        x, y = getData(files, getOneHot(len(classes), idx))
        exes.append(x)
        whys.append(y)
    
    all_examples = np.concatenate(exes)
    all_labels = np.concatenate(whys)
    labeled_examples = list(zip(all_examples, all_labels))
    if shuf:
        shuffle(labeled_examples)

    # Separate and return the features and labels.
    features = [example for (example, _) in labeled_examples]
    labels = [label for (_, label) in labeled_examples]
    return (features, labels)

def train(get_examples, num, _NUM_BATCHES = 50):
    _NUM_CLASSES = 2
    model_name_to_save = './model/model_%s' % (num)    
    with tf.Graph().as_default(), tf.Session() as sess:
        pred = None
        # Define VGGish.
        embeddings = vggish_slim.define_vggish_slim(True) # Do we train VGG-ish?

        # Define a shallow classification model and associated training ops on top
        # of VGGish.
        with tf.variable_scope('mymodel'):
            # Add a fully connected layer with 100 units.
            num_units = 100
            fc = slim.fully_connected(embeddings, num_units)

            # Add a classifier layer at the end, consisting of parallel logistic
            # classifiers, one per class. This allows for multi-class tasks.
            logits = slim.fully_connected(
              fc, _NUM_CLASSES, activation_fn=None, scope='logits')
            pred = tf.sigmoid(logits, name='prediction')

            # Add training ops.
            with tf.variable_scope('train'):
                global_step = tf.Variable(
                    0, name='global_step', trainable=False,
                    collections=[tf.GraphKeys.GLOBAL_VARIABLES,
                                 tf.GraphKeys.GLOBAL_STEP])

            # Labels are assumed to be fed as a batch multi-hot vectors, with
            # a 1 in the position of each positive class label, and 0 elsewhere.
            labels = tf.placeholder(
                tf.float32, shape=(None, _NUM_CLASSES), name='labels')

            # Cross-entropy label loss.
            xent = tf.nn.sigmoid_cross_entropy_with_logits(
                logits=logits, labels=labels, name='xent')
            loss = tf.reduce_mean(xent, name='loss_op')
            tf.summary.scalar('loss', loss)

            # We use the same optimizer and hyperparameters as used to train VGGish.
            optimizer = tf.train.AdamOptimizer(
                learning_rate=vggish_params.LEARNING_RATE,
                epsilon=vggish_params.ADAM_EPSILON)
            optimizer.minimize(loss, global_step=global_step, name='train_op')

        # Initialize all variables in the model, and then load the pre-trained
        # VGGish checkpoint.
        sess.run(tf.global_variables_initializer())
        vggish_slim.load_vggish_slim_checkpoint(sess, './vggish_model.ckpt')

        # Locate all the tensors and ops we need for the training loop.
        features_tensor = sess.graph.get_tensor_by_name(
            vggish_params.INPUT_TENSOR_NAME)
        #for op in tf.get_default_graph().get_operations():
            #print(str(op.name))

        labels_tensor = sess.graph.get_tensor_by_name('mymodel/labels:0')
        #labels_tensor = sess.graph.get_tensor_by_name('mymodel/train/labels:0')    
        global_step_tensor = sess.graph.get_tensor_by_name(
            'mymodel/train/global_step:0')
        loss_tensor = sess.graph.get_tensor_by_name('mymodel/loss_op:0')
        train_op = sess.graph.get_operation_by_name('mymodel/train_op')

        # The training loop.
        for _ in range(_NUM_BATCHES):
            (features, labels) = get_examples(num, shuf=True)
            [num_steps, loss, _] = sess.run(
                [global_step_tensor, loss_tensor, train_op],
                feed_dict={features_tensor: features, labels_tensor: labels})
            print('Step %d: loss %g' % (num_steps, loss))
            saver = tf.train.Saver()
            saver.save(sess, model_name_to_save)








        features_tensor = sess.graph.get_tensor_by_name(
            vggish_params.INPUT_TENSOR_NAME)
        embedding_tensor = sess.graph.get_tensor_by_name(
            vggish_params.OUTPUT_TENSOR_NAME)
        prediction=tf.argmax(logits,1)
        (features, labels) = get_examples(num, shuf=False)
        embedding_batch = sess.run(pred, feed_dict={features_tensor: features})
        return embedding_batch 

def getSavedSamples(num, shuf = True):
    features_name = 'checkpoints/features_%s.npy' % (num)
    labels_name = 'checkpoints/labels_%s.npy' % (num)
    features = np.load(features_name)
    labels = np.load(labels_name)

    labeled_examples = list(zip(features, labels))
    if shuf:
        shuffle(labeled_examples)

    # Separate and return the features and labels.
    features = [example for (example, _) in labeled_examples]
    labels = [label for (_, label) in labeled_examples]
    return (features, labels)

def trainAndSaveAndPredict(number_of_samples = 1, epochs = 5):
    features_name = 'checkpoints/features_%s.npy' % (number_of_samples)
    labels_name = 'checkpoints/labels_%s.npy' % (number_of_samples)
    
    if not os.path.isfile(features_name) or not os.path.isfile(labels_name):
        print('no files saved for %s' % number_of_samples)
        (features, labels) = getSamples(['laughter', 'notlaughter'], shuf = False, num = number_of_samples)
        np.save('checkpoints/features_%s.npy' % (number_of_samples), features)
        np.save('checkpoints/labels_%s.npy' % (number_of_samples), labels)

    preds = train(getSavedSamples, number_of_samples, epochs)

    with tf.Graph().as_default(), tf.Session() as sess:
        print(preds)
        print(sess.run(tf.argmax(input=preds, axis=1)))

trainAndSaveAndPredict(number_of_samples = 3, epochs = 5)

collecting samples
INFO:tensorflow:Restoring parameters from ./vggish_model.ckpt
Step 1: loss 0.845144
Step 2: loss 0.744279
Step 3: loss 0.680981
Step 4: loss 0.648908
Step 5: loss 0.619371
[[0.38603973 0.5640656 ]
 [0.3811315  0.5746864 ]
 [0.38298747 0.57671666]
 ...
 [0.3830327  0.5694682 ]
 [0.3824789  0.5744395 ]
 [0.38392663 0.56973225]]
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 