Implement

keik · Aug 19, 2016 · e7616e5 · e7616e5
1 parent 638c9aa
commit e7616e5
Show file tree

Hide file tree

Showing 10 changed files with 243 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,6 @@
+dist
+.coverage
+*.pyc
+.cache
+__pycache__
+*.egg-info
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,11 @@
+language: python
+python:
+  - 3.5
+addons:
+  apt:
+    packages:
+      - portaudio19-dev
+script:
+  - make
+after_success:
+  pip install python-coveralls && coveralls
diff --git a/Makefile b/Makefile
@@ -0,0 +1,25 @@
+TAG="\n\n\033[0;32m\#\#\# "
+END=" \#\#\# \033[0m\n"
+SELF="audiotrans_transform_istft"
+DEV_DEPS="requirements-dev.txt"
+
+test: init
+	@echo $(TAG)$@$(END)
+	flake8
+	py.test tests/tests.py --cov $(SELF) --verbose
+
+test-all: uninstall-all test
+	@echo
+
+init: uninstall-self
+	@echo $(TAG)$@$(END)
+	pip install --upgrade -r $(DEV_DEPS)
+	pip install --upgrade --editable .
+
+uninstall-all: uninstall-self
+	@echo $(TAG)$@$(END)
+	- pip uninstall --yes -r $(DEV_DEPS) 2>/dev/null
+
+uninstall-self:
+	@echo $(TAG)$@$(END)
+	- pip uninstall --yes $(SELF) 2>/dev/null
diff --git a/audiotrans_transform_istft/__init__.py b/audiotrans_transform_istft/__init__.py
@@ -0,0 +1,6 @@
+from .__main__ import ISTFTTransform
+
+
+__all__ = [
+    'ISTFTTransform'
+]
diff --git a/audiotrans_transform_istft/__main__.py b/audiotrans_transform_istft/__main__.py
@@ -0,0 +1,79 @@
+# -*- coding: utf-8 -*-
+
+from argparse import ArgumentParser, RawTextHelpFormatter
+import numpy as np
+from logging import getLogger, StreamHandler, Formatter, DEBUG
+from audiotrans import Transform
+
+logger = getLogger(__package__)
+handler = StreamHandler()
+handler.setFormatter(Formatter('[%(asctime)s %(levelname)s %(name)s] %(message)s'))
+logger.addHandler(handler)
+
+
+class ISTFTTransform(Transform):
+
+    def __init__(self, argv=[]):
+        parser = ArgumentParser(
+            prog='istft',
+            description="""audiotrans transform module for Short-Time Fourier Transformation (ISTFT)
+
+Transform wave array as np.ndarray shaped (1,) to ISTFT matrix as
+np.ndarray shaped (1 + widnow_size/2, (len(wave) - window_size) / hop-size + 1).""",
+            formatter_class=RawTextHelpFormatter)
+
+        parser.add_argument('-v', '--verbose', dest='verbose',
+                            action='store_true',
+                            help='Run as verbose mode')
+
+        parser.add_argument('-H', '--hop-size', dest='hop_size', default='256',
+                            help='Hop size to FFT. Default is 256')
+
+        args = parser.parse_args(argv)
+
+        if args.verbose:
+            logger.setLevel(DEBUG)
+            logger.info('Start as verbose mode')
+
+        self.window_size = None
+        self.hop_size = int(args.hop_size)
+        self.prev_remixed = np.zeros(0)
+        self.prev_win_sum = np.zeros(0)
+
+    def transform(self, stft_matrix):
+
+        # restore symmetoric spectrum
+        stft_matrix = np.concatenate((stft_matrix, stft_matrix[-2:0:-1].conj()), 0)
+
+        if self.window_size is None:
+            self.window_size = stft_matrix.shape[0]
+            self.win = np.hamming(self.window_size)
+            self.win_sqr = self.win ** 2
+
+        cols = stft_matrix.shape[1]
+
+        # initialize buffer for remixed wave and win with zeros
+        x = np.zeros(self.window_size + (cols - 1) * self.hop_size)
+        win_sum = np.zeros(self.window_size + (cols - 1) * self.hop_size)
+
+        # sum previous remixed wave and window
+        x[:len(self.prev_remixed)] += self.prev_remixed
+        win_sum[:len(self.prev_win_sum)] += self.prev_win_sum
+
+        # ISTFT
+        for i in range(cols):
+            offset = i * self.hop_size
+            x[offset:offset + self.window_size] += np.fft.ifft(stft_matrix[:, i]).real * self.win
+            win_sum[offset:offset + self.window_size] += self.win_sqr
+
+        # split remixed to returns and cache for next
+        x, self.prev_remixed = np.split(x, [cols * self.hop_size])
+        win_sum, self.prev_win_sum = np.split(win_sum, [cols * self.hop_size])
+
+        nonzero = win_sum > np.spacing(1)
+        x[nonzero] /= win_sum[nonzero]
+
+        logger.info('ISTFT from {} form STFT matrix to {} form wave'
+                    .format(stft_matrix.shape, x.shape))
+
+        return x
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -0,0 +1,4 @@
+flake8
+pytest
+pytest-cov
+audiotrans-transform-stft
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,2 @@
+[flake8]
+max-line-length = 99
diff --git a/setup.py b/setup.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from setuptools import setup, find_packages
+
+install_requires = [
+    'numpy',
+    'audiotrans'
+]
+
+setup(name='audiotrans-transform-istft',
+      version='0.1.0.dev1',
+      description="""audiotrans transform module to Inverse Short-Time Fourier Transformation (ISTFT)""",  # NOQA
+      author='keik',
+      author_email='k4t0.kei@gmail.com',
+      url='https://github.com/keik/audiotrans-transform-istft',
+      license='MIT',
+      classifiers=[
+          'License :: OSI Approved :: MIT License',
+          'Intended Audience :: Developers',
+          'Topic :: Multimedia :: Sound/Audio :: Conversion',
+          'Programming Language :: Python',
+          'Programming Language :: Python :: 3',
+          'Programming Language :: Python :: 3.5',
+      ],
+      packages=find_packages(),
+      install_requires=install_requires)
diff --git a/tests/fixture/drums+bass.wav b/tests/fixture/drums+bass.wav
diff --git a/tests/tests.py b/tests/tests.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+
+import pytest
+import wave
+import numpy as np
+from audiotrans_transform_stft import STFTTransform
+from audiotrans_transform_istft import ISTFTTransform
+
+
+def test_accept_arg_of_verbose():
+    ISTFTTransform(['-v'])  # no error would be raised
+
+
+def test_accept_args_of_window_and_hop_sizes():
+
+    tr = ISTFTTransform('-H 256'.split())
+    stft_matrix = np.random.rand(513, 1)
+    transformed = tr.transform(stft_matrix)
+    print(transformed.shape)
+    assert transformed.shape == (256,)
+
+    tr = ISTFTTransform('-H 256'.split())
+    stft_matrix = np.random.rand(513, 4)
+    transformed = tr.transform(stft_matrix)
+    print(transformed.shape)
+    assert transformed.shape == (256 * 4,)
+
+    tr = ISTFTTransform('-H 128'.split())
+    stft_matrix = np.random.rand(513, 4)
+    transformed = tr.transform(stft_matrix)
+    assert transformed.shape == (128 * 4,)
+
+    tr = ISTFTTransform('-H 128'.split())
+    stft_matrix = np.random.rand(513, 8)
+    transformed = tr.transform(stft_matrix)
+    assert transformed.shape == (128 * 8,)
+
+
+@pytest.mark.parametrize('buf_size, win_size, hop_size', [(1024, 1024, 256),
+                                                          (160, 100, 30)])
+def test_repeatedly_transform_should_be_connected_smoothly(buf_size, win_size, hop_size):
+
+    tr_stft = STFTTransform('-w {} -H {}'.format(win_size, hop_size).split())
+    tr_istft = ISTFTTransform('-H {}'.format(hop_size).split())
+    wf = wave.open('tests/fixture/drums+bass.wav')
+    all_data = np.fromstring(wf.readframes(wf.getnframes()), np.int16)
+    stft_matrix = np.reshape([], (win_size // 2 + 1, -1))
+    transformed = np.array([])
+    for idx, s in enumerate(range(0, len(all_data), buf_size)):
+
+        # STFT and cache STFT matrix
+        tmp = tr_stft.transform(all_data[s:s + buf_size])
+        stft_matrix = np.concatenate((stft_matrix, tmp), 1)
+
+        # ISTFT and cache ISTFT wave
+        tmp = tr_istft.transform(tmp)
+        transformed = np.concatenate((transformed, tmp))
+
+    # ISTFT on batch
+    istft_wave = istft(stft_matrix, hop_size)
+
+    # assert equal between ISTFT on batch and stream
+    assert (transformed == istft_wave[:len(transformed)]).all()
+
+
+def istft(stft_matrix, hop_size):
+    stft_matrix = np.concatenate((stft_matrix, stft_matrix[-2:0:-1].conj()), 0)
+    window_size, cols = stft_matrix.shape
+
+    win = np.hamming(window_size)
+    win_square = win ** 2
+
+    x = np.zeros(window_size + (cols - 1) * hop_size)
+    win_sum = np.zeros(window_size + (cols - 1) * hop_size)
+    for col in range(cols):
+        s = col * hop_size
+        x[s:s + window_size] += (np.fft.ifft(stft_matrix[:, col]).real * win)
+        win_sum[s:s + window_size] += win_square
+
+    nonzero_indices = win_sum > np.spacing(1)
+    x[nonzero_indices] /= win_sum[nonzero_indices]
+
+    return x