# Load functions from openl3/cli.py

In [1]:
# %load ~/dev/openl3/openl3/cli.py
from __future__ import print_function
import os
import sys
from openl3.openl3_exceptions import OpenL3Error
from openl3 import process_file
from argparse import ArgumentParser, RawDescriptionHelpFormatter, ArgumentTypeError
from collections import Iterable
from six import string_types


def positive_float(value):
    """An argparse type method for accepting only positive floats"""
    try:
        fvalue = float(value)
    except (ValueError, TypeError) as e:
        raise ArgumentTypeError('Expected a positive float, error message: '
                                '{}'.format(e))
    if fvalue <= 0:
        raise ArgumentTypeError('Expected a positive float')
    return fvalue


def get_file_list(input_list):
    """Get list of files from the list of inputs"""
    if not isinstance(input_list, Iterable) or isinstance(input_list, string_types):
        raise ArgumentTypeError('input_list must be iterable (and not string)')
    file_list = []
    for item in input_list:
        if os.path.isfile(item):
            file_list.append(os.path.abspath(item))
        elif os.path.isdir(item):
            for fname in os.listdir(item):
                path = os.path.join(item, fname)
                if os.path.isfile(path):
                    file_list.append(path)
        else:
            raise OpenL3Error('Could not find {}'.format(item))

    return file_list


def run(inputs, output_dir=None, suffix=None, input_repr="mel256", content_type="music",
        embedding_size=6144, center=True, hop_size=0.1, verbose=False):
    """
    Computes and saves L3 embedding for given inputs.

    Parameters
    ----------
    inputs : list of str, or str
        File/directory path or list of file/directory paths to be processed
    output_dir : str or None
        Path to directory for saving output files. If None, output files will
        be saved to the directory containing the input file.
    suffix : str or None
        String to be appended to the output filename, i.e. <base filename>_<suffix>.npy.
        If None, then no suffix will be added, i.e. <base filename>.npy.
    input_repr : "linear", "mel128", or "mel256"
        Spectrogram representation used for model.
    content_type : "music" or "env"
        Type of content used to train embedding.
    embedding_size : 6144 or 512
        Embedding dimensionality.
    center : boolean
        If True, pads beginning of signal so timestamps correspond
        to center of window.
    hop_size : float
        Hop size in seconds.
    quiet : boolean
        If True, suppress all non-error output to stdout

    Returns
    -------
    """

    if isinstance(inputs, string_types):
        file_list = [inputs]
    elif isinstance(inputs, Iterable):
        file_list = get_file_list(inputs)
    else:
        raise OpenL3Error('Invalid input: {}'.format(str(inputs)))

    if len(file_list) == 0:
        print('openl3: No WAV files found in {}. Aborting.'.format(str(inputs)))
        sys.exit(-1)

    # Process all files in the arguments
    for filepath in file_list:
        if verbose:
            print('openl3: Processing: {}'.format(filepath))
        process_file(filepath,
                     output_dir=output_dir,
                     suffix=suffix,
                     input_repr=input_repr,
                     content_type=content_type,
                     embedding_size=embedding_size,
                     center=center,
                     hop_size=hop_size,
                     verbose=verbose)
    if verbose:
        print('openl3: Done!')


def parse_args(args):
    parser = ArgumentParser(sys.argv[0], description=main.__doc__,
                            formatter_class=RawDescriptionHelpFormatter)

    parser.add_argument('inputs', nargs='+',
                        help='Path or paths to files to process, or path to '
                             'a directory of files to process.')

    parser.add_argument('--output-dir', '-o', default=None,
                        help='Directory to save the ouptut file(s); '
                             'if not given, the output will be '
                             'saved to the same directory as the input WAV '
                             'file(s).')

    parser.add_argument('--suffix', '-x', default=None,
                        help='String to append to the output filenames.'
                             'If not provided, no suffix is added.')

    parser.add_argument('--input-repr', '-i', default='mel256',
                        choices=['linear', 'mel128', 'mel256'],
                        help='String specifying the time-frequency input '
                             'representation for the embedding model.')

    parser.add_argument('--content-type', '-c', default='music',
                        choices=['music', 'env'],
                        help='Content type used to train embedding model.')

    parser.add_argument('--embedding-size', '-s', type=int, default=6144,
                        help='Embedding dimensionality.')

    parser.add_argument('--no-centering', '-n', action='store_true', default=False,
                        help='Do not pad signal; timestamps will correspond to '
                             'the beginning of each analysis window.')

    parser.add_argument('--hop-size', '-t', type=positive_float, default=0.1,
                        help='Hop size in seconds for processing audio files.')

    parser.add_argument('--quiet', '-q', action='store_true', default=False,
                        help='Suppress all non-error messages to stdout.')

    return parser.parse_args(args)


def main():
    """
    Extracts audio embeddings from models based on the Look, Listen, and Learn models (Arandjelovic and Zisserman 2017).
    """
    args = parse_args(sys.argv[1:])

    run(args.inputs,
        output_dir=args.output_dir,
        suffix=args.suffix,
        input_repr=args.input_repr,
        content_type=args.content_type,
        embedding_size=args.embedding_size,
        center=not args.no_centering,
        hop_size=args.hop_size,
        verbose=not args.quiet)


Using TensorFlow backend.


# Set paths

In [2]:
# path to wav file
test_path = os.path.expanduser('~/dev/openl3/tests/')
chirp44_path = os.path.join(test_path, 'data', 'audio', 'chirp_44k.wav')

In [3]:
# path to store output embeddings
output_dir = os.path.expanduser('~/openl3_output/')

# Compute embeddings

In [4]:
# compute mel256/music/6144 regression embedding
suffix=None
input_repr='mel256'
content_type='music'
embedding_size=6144
center=True
hop_size=0.1
verbose=False

run(chirp44_path,
    output_dir=output_dir,
    suffix=suffix,
    input_repr=input_repr,
    content_type=content_type,
    embedding_size=embedding_size,
    center=center,
    hop_size=hop_size,
    verbose=verbose)

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [5]:
# compute linear/env/512 regression embedding
suffix='linear'
input_repr='linear'
content_type='env'
embedding_size=512
center=False
hop_size=0.5
verbose=False

run(chirp44_path,
    output_dir=output_dir,
    suffix=suffix,
    input_repr=input_repr,
    content_type=content_type,
    embedding_size=embedding_size,
    center=center,
    hop_size=hop_size,
    verbose=verbose)

# OPTIONAL: compare to previous regression data

In [7]:
import numpy as np

In [8]:
reg_emb_path = os.path.join(test_path, 'data', 'regression', 'chirp_44k.npz')
reg_emb_linear_path = os.path.join(test_path, 'data', 'regression', 'chirp_44k_linear.npz')

new_emb_path = os.path.join(output_dir, 'chirp_44k.npz')
new_emb_linear_path = os.path.join(output_dir, 'chirp_44k_linear.npz')

In [9]:
reg_emb = np.load(reg_emb_path)
reg_emb_linear = np.load(reg_emb_linear_path)

new_emb = np.load(new_emb_path)
new_emb_linear = np.load(new_emb_linear_path)

In [10]:
assert np.allclose(reg_emb['timestamps'], new_emb['timestamps'])

In [11]:
assert np.allclose(reg_emb['embedding'], new_emb['embedding'], rtol=1e-05, atol=1e-06, equal_nan=False)

In [12]:
assert np.allclose(reg_emb_linear['timestamps'], new_emb_linear['timestamps'])

In [13]:
assert np.allclose(reg_emb_linear['embedding'], new_emb_linear['embedding'], rtol=1e-05, atol=1e-06, equal_nan=False)