Import torchaudio 20200528

Summary: Import Up to #664 Reviewed By: cpuhrsch Differential Revision: D21728204 fbshipit-source-id: 648dd622087fa762194ca5f89a310500e777263d
pytorch · May 28, 2020 · e065a0f · e065a0f
1 parent 0d99b9d
commit e065a0f
Show file tree

Hide file tree

Showing 49 changed files with 1,517 additions and 687 deletions.
diff --git a/.use_external_sox b/.use_external_sox
diff --git a/README.md b/README.md
@@ -13,7 +13,7 @@ having all the computations be through Pytorch operations which makes it easy
 to use and feel like a natural extension.
 
 - [Support audio I/O (Load files, Save files)](http://pytorch.org/audio/)
-  - Load the following formats into a torch Tensor using sox
+  - Load the following formats into a torch Tensor using SoX
     - mp3, wav, aac, ogg, flac, avr, cdda, cvs/vms,
     - aiff, au, amr, mp2, mp4, ac3, avi, wmv,
     - mpeg, ircam and any other format supported by libsox.
@@ -98,10 +98,29 @@ Quick Usage
 
 ```python
 import torchaudio
-waveform, sample_rate = torchaudio.load('foo.mp3')  # load tensor from file
-torchaudio.save('foo_save.mp3', waveform, sample_rate)  # save tensor to file
+
+waveform, sample_rate = torchaudio.load('foo.wav')  # load tensor from file
+torchaudio.save('foo_save.wav', waveform, sample_rate)  # save tensor to file
 ```
 
+Backend Dispatch
+----------------
+
+By default in OSX and Linux, torchaudio uses SoX as a backend to load and save files.
+The backend can be changed to [SoundFile](https://pysoundfile.readthedocs.io/en/latest/)
+using the following. See [SoundFile](https://pysoundfile.readthedocs.io/en/latest/)
+for installation instructions.
+
+```python
+import torchaudio
+torchaudio.set_audio_backend("soundfile")  # switch backend
+
+waveform, sample_rate = torchaudio.load('foo.wav')  # load tensor from file, as usual
+torchaudio.save('foo_save.wav', waveform, sample_rate)  # save tensor to file, as usual
+```
+
+Unlike SoX, SoundFile does not currently support mp3.
+
 API Reference
 -------------
 

diff --git a/build_tools/__init__.py b/build_tools/__init__.py
diff --git a/build_tools/setup_helpers/__init__.py b/build_tools/setup_helpers/__init__.py
@@ -0,0 +1 @@
+from .extension import *  # noqa
diff --git a/build_tools/setup_helpers/build_third_party.sh b/build_tools/setup_helpers/build_third_party.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Build third party libraries in `<repo_root>/third_party/build` or in `<given_prefix>/third_party/build`.
+
+set -e
+
+this_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+root_dir="${this_dir}/../.."
+
+prefix="${1:-}"
+if [ -z "${prefix}" ]; then
+    prefix="${root_dir}"
+fi
+
+tp_dir="${prefix}/third_party"
+tmp_dir="${tp_dir}/tmp"
+build_dir="${tp_dir}/build"
+
+mkdir -p "${tmp_dir}" "${build_dir}"
+
+. "${this_dir}/build_third_party_helper.sh"
+
+if ! found_lame "${build_dir}" ; then
+   build_lame "${tmp_dir}" "${build_dir}"
+fi
+
+if ! found_flac "${build_dir}" ; then
+    build_flac "${tmp_dir}" "${build_dir}"
+fi
+
+if ! found_mad "${build_dir}" ; then
+    build_mad "${tmp_dir}" "${build_dir}"
+fi
+
+if ! found_sox "${build_dir}" ; then
+    build_sox "${tmp_dir}" "${build_dir}"
+fi
diff --git a/build_tools/setup_helpers/build_third_party_helper.sh b/build_tools/setup_helpers/build_third_party_helper.sh
@@ -0,0 +1,173 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+# Global options
+CURL_OPTS="-L --retry 10 --connect-timeout 5 --max-time 180"
+MAKE_OPTS="-j"
+CONFIG_OPTS=""
+
+if [ -z ${DEBUG+x} ]; then
+    CURL_OPTS="${CURL_OPTS} --silent --show-error"
+    MAKE_OPTS="${MAKE_OPTS} --quiet"
+    CONFIG_OPTS="${CONFIG_OPTS} --quiet"
+fi
+
+all_found() {
+    dir="$1"
+    shift
+    while [ "$#" -gt 0 ]; do
+        if [ ! -f "${dir}/$1" ]; then
+            return 1
+        fi
+        shift
+    done
+}
+
+
+found_lame() {
+    all_found "$1" 'include/lame/lame.h' 'lib/libmp3lame.a'
+}
+
+found_flac() {
+    all_found "$1" \
+              'include/FLAC/format.h' \
+              'include/FLAC/stream_decoder.h' \
+              'include/FLAC/export.h' \
+              'include/FLAC/ordinals.h' \
+              'include/FLAC/all.h' \
+              'include/FLAC/assert.h' \
+              'include/FLAC/callback.h' \
+              'include/FLAC/metadata.h' \
+              'include/FLAC/stream_encoder.h' \
+              'include/FLAC++/export.h' \
+              'include/FLAC++/decoder.h' \
+              'include/FLAC++/all.h' \
+              'include/FLAC++/metadata.h' \
+              'include/FLAC++/encoder.h' \
+              'lib/libFLAC++.a' \
+              'lib/libFLAC.a'
+}
+
+found_mad() {
+    all_found "$1" 'include/mad.h' 'lib/libmad.a'
+}
+
+found_sox() {
+    all_found "$1" 'include/sox.h' 'lib/libsox.a'
+}
+
+build_lame() {
+    work_dir="$1"
+    install_dir="$2"
+    package="lame-3.99.5"
+    url="https://downloads.sourceforge.net/project/lame/lame/3.99/lame-3.99.5.tar.gz"
+    (
+        cd "${work_dir}"
+        if [ ! -d "${package}" ]; then
+            if [ ! -f "${package}.tar.gz" ]; then
+                printf "Fetching liblame from %s\n" "${url}"
+                curl $CURL_OPTS -o "${package}.tar.gz" "${url}"
+            fi
+            tar xfp "${package}.tar.gz"
+        fi
+        # build statically
+        printf "Building liblame\n"
+        cd "${package}"
+        if [ ! -f Makefile ]; then
+            ./configure ${CONFIG_OPTS} \
+                        --disable-shared --enable-static --prefix="${install_dir}" CFLAGS=-fPIC CXXFLAGS=-fPIC \
+                        --with-pic --disable-debug --disable-dependency-tracking --enable-nasm
+        fi
+        make ${MAKE_OPTS} > make.log 2>&1
+        make ${MAKE_OPTS} install
+    )
+}
+
+build_flac() {
+    work_dir="$1"
+    install_dir="$2"
+    package="flac-1.3.2"
+    url="https://downloads.sourceforge.net/project/flac/flac-src/flac-1.3.2.tar.xz"
+    (
+        cd "${work_dir}"
+        if [ ! -d "${package}" ]; then
+            if [ ! -f "${package}.tar.xz" ]; then
+                printf "Fetching flac from %s\n" "${url}"
+                curl $CURL_OPTS -o "${package}.tar.xz" "${url}"
+            fi
+            tar xfp "${package}.tar.xz"
+        fi
+        # build statically
+        printf "Building flac\n"
+        cd "${package}"
+        if [ ! -f Makefile ]; then
+            ./configure ${CONFIG_OPTS} \
+                        --disable-shared --enable-static --prefix="${install_dir}" CFLAGS=-fPIC CXXFLAGS=-fPIC \
+                        --with-pic --disable-debug --disable-dependency-tracking
+        fi
+        make ${MAKE_OPTS} > make.log 2>&1
+        make ${MAKE_OPTS} install
+    )
+}
+
+build_mad() {
+    work_dir="$1"
+    install_dir="$2"
+    package="libmad-0.15.1b"
+    url="https://downloads.sourceforge.net/project/mad/libmad/0.15.1b/libmad-0.15.1b.tar.gz"
+    (
+        cd "${work_dir}"
+        if [ ! -d "${package}" ]; then
+            if [ ! -f "${package}.tar.gz" ]; then
+                printf "Fetching mad from %s\n" "${url}"
+                curl $CURL_OPTS -o "${package}.tar.gz" "${url}"
+            fi
+            tar xfp "${package}.tar.gz"
+        fi
+        # build statically
+        printf "Building mad\n"
+        cd "${package}"
+        if [ ! -f Makefile ]; then
+            # See https://stackoverflow.com/a/12864879/23845
+            sed -i.bak 's/-march=i486//' configure
+            ./configure ${CONFIG_OPTS} \
+                        --disable-shared --enable-static --prefix="${install_dir}" CFLAGS=-fPIC CXXFLAGS=-fPIC \
+                        --with-pic --disable-debug --disable-dependency-tracking
+        fi
+        make ${MAKE_OPTS} > make.log 2>&1
+        make ${MAKE_OPTS} install
+    )
+}
+
+build_sox() {
+    work_dir="$1"
+    install_dir="$2"
+    package="sox-14.4.2"
+    url="https://downloads.sourceforge.net/project/sox/sox/14.4.2/sox-14.4.2.tar.bz2"
+    (
+        cd "${work_dir}"
+        if [ ! -d "${package}" ]; then
+            if [ ! -f "${package}.tar.bz2" ]; then
+                printf "Fetching SoX from %s\n" "${url}"
+                curl $CURL_OPTS -o "${package}.tar.bz2" "${url}"
+            fi
+            tar xfp "${package}.tar.bz2"
+        fi
+        # build statically
+        printf "Building sox\n"
+        cd "${package}"
+        if [ ! -f Makefile ]; then
+            # --without-png makes OS X build less hazardous; somehow the build
+            # finds png and enables it.  We don't want it; we'd need to package
+            # it statically if we do.
+            ./configure ${CONFIG_OPTS} --disable-shared --enable-static --prefix="${install_dir}" \
+                        LDFLAGS="-L${install_dir}/lib" CPPFLAGS="-I${install_dir}/include" \
+                        --with-lame --with-flac --with-mad --without-alsa --without-coreaudio \
+                        --without-png --without-oggvorbis --without-oss --without-sndfile \
+                        CFLAGS=-fPIC CXXFLAGS=-fPIC --with-pic --disable-debug --disable-dependency-tracking
+        fi
+        make ${MAKE_OPTS} > make.log 2>&1
+        make ${MAKE_OPTS} install
+    )
+}
diff --git a/build_tools/setup_helpers/extension.py b/build_tools/setup_helpers/extension.py
@@ -0,0 +1,111 @@
+import platform
+import subprocess
+from pathlib import Path
+
+from torch.utils.cpp_extension import (
+    CppExtension,
+    BuildExtension as TorchBuildExtension
+)
+
+__all__ = [
+    'get_ext_modules',
+    'BuildExtension',
+]
+
+_THIS_DIR = Path(__file__).parent.resolve()
+_ROOT_DIR = _THIS_DIR.parent.parent.resolve()
+_CSRC_DIR = _ROOT_DIR / 'torchaudio' / 'csrc'
+_TP_BASE_DIR = _ROOT_DIR / 'third_party'
+_TP_INSTALL_DIR = _TP_BASE_DIR / 'build'
+
+# Temporary fix for building in fbcode
+# at the moment, we have to use external sox in fbcode
+_BUILD_DEPS = not (_ROOT_DIR / '.use_external_sox').exists()
+
+
+def _get_eca(debug):
+    eca = []
+    if debug:
+        eca += ["-O0", "-g"]
+    else:
+        eca += ["-O3"]
+    return eca
+
+
+def _get_ela(debug):
+    ela = []
+    if debug:
+        if platform.system() == "Windows":
+            ela += ["/DEBUG:FULL"]
+        else:
+            ela += ["-O0", "-g"]
+    else:
+        ela += ["-O3"]
+    return ela
+
+
+def _get_srcs():
+    return [str(p) for p in _CSRC_DIR.glob('**/*.cpp')]
+
+
+def _get_include_dirs():
+    dirs = [
+        str(_ROOT_DIR),
+    ]
+    if _BUILD_DEPS:
+        dirs.append(str(_TP_INSTALL_DIR / 'include'))
+    return dirs
+
+
+def _get_extra_objects():
+    objs = []
+    if _BUILD_DEPS:
+        # NOTE: The order of the library listed bellow matters.
+        #
+        # (the most important thing is that dependencies come after a library
+        # e.g., sox comes first)
+        libs = ['libsox.a', 'libmad.a', 'libFLAC.a', 'libmp3lame.a']
+        for lib in libs:
+            objs.append(str(_TP_INSTALL_DIR / 'lib' / lib))
+    return objs
+
+
+def _get_libraries():
+    return [] if _BUILD_DEPS else ['sox']
+
+
+def _build_codecs():
+    subprocess.run(
+        args=[str(_THIS_DIR / 'build_third_party.sh')],
+        check=True,
+    )
+
+
+def _configure_third_party():
+    _build_codecs()
+
+
+_EXT_NAME = 'torchaudio._torchaudio'
+
+
+def get_ext_modules(debug=False):
+    if platform.system() == 'Windows':
+        return None
+    return [
+        CppExtension(
+            _EXT_NAME,
+            _get_srcs(),
+            libraries=_get_libraries(),
+            include_dirs=_get_include_dirs(),
+            extra_compile_args=_get_eca(debug),
+            extra_objects=_get_extra_objects(),
+            extra_link_args=_get_ela(debug),
+        ),
+    ]
+
+
+class BuildExtension(TorchBuildExtension):
+    def build_extension(self, ext):
+        if ext.name == _EXT_NAME and _BUILD_DEPS:
+            _configure_third_party()
+        super().build_extension(ext)
diff --git a/docs/source/functional.rst b/docs/source/functional.rst
@@ -138,6 +138,11 @@ Functions to perform common audio operations.
 
 .. autofunction:: overdrive
 
+:hidden:`phaser`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: phaser
+
 :hidden:`mask_along_axis`
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/docs/source/models.rst b/docs/source/models.rst
@@ -0,0 +1,17 @@
+.. role:: hidden
+    :class: hidden-section
+
+torchaudio.models
+======================
+
+.. currentmodule:: torchaudio.models
+
+The models subpackage contains definitions of models for addressing common audio tasks.
+
+
+:hidden:`Wav2Letter`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: Wav2Letter
+
+  .. automethod:: forward