Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ coverage.xml
# docs
_build/

# tests
*.wav

# tools
tools/**/

Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

PROJECT := diffsptk
MODULE :=
OPT :=

PYTHON_VERSION := 3.9
TORCH_VERSION := 2.0.0
Expand Down Expand Up @@ -63,7 +64,7 @@ format: tool

test: tool
[ -n "$(MODULE)" ] && module=tests/test_$(MODULE).py || module=; \
. ./venv/bin/activate && export PATH=tools/SPTK/bin:$$PATH && python -m pytest $$module
. ./venv/bin/activate && export PATH=tools/SPTK/bin:$$PATH && python -m pytest $$module $(OPT)

test-clean:
rm -rf tests/__pycache__
Expand Down
63 changes: 62 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,41 @@ diffsptk.write("voiced.wav", x_voiced, sr)
diffsptk.write("unvoiced.wav", x_unvoiced, sr)
```

### LPC analysis and synthesis

```python
import diffsptk

fl = 400 # Frame length.
fp = 80 # Frame period.
M = 24 # LPC dimensions.

# Read waveform.
x, sr = diffsptk.read("assets/data.wav")

# Estimate LPC of x.
frame = diffsptk.Frame(frame_length=fl, frame_period=fp)
window = diffsptk.Window(in_length=fl)
lpc = diffsptk.LPC(frame_length=fl, lpc_order=M, eps=1e-6)
a = lpc(window(frame(x)))

# Convert to inverse filter coefficients.
norm0 = diffsptk.AllPoleToAllZeroDigitalFilterCoefficients(filter_order=M)
b = norm0(a)

# Reconstruct x.
zerodf = diffsptk.AllZeroDigitalFilter(filter_order=M, frame_period=fp)
poledf = diffsptk.AllPoleDigitalFilter(filter_order=M, frame_period=fp)
x_hat = poledf(zerodf(x, b), a)

# Write reconstructed waveform.
diffsptk.write("reconst.wav", x_hat, sr)

# Compute error.
error = (x_hat - x).abs().sum()
print(error)
```

### Mel-spectrogram, MFCC, and PLP extraction

```python
Expand All @@ -108,7 +143,7 @@ x, sr = diffsptk.read("assets/data.wav")
stft = diffsptk.STFT(frame_length=fl, frame_period=fp, fft_length=n_fft)
X = stft(x)

# Extract mel-spectrogram.
# Extract log mel-spectrogram.
fbank = diffsptk.FBANK(
n_channel=n_channel,
fft_length=n_fft,
Expand Down Expand Up @@ -196,6 +231,32 @@ error = (x_hat - x).abs().sum()
print(error)
```

### Modified discrete cosine transform

```python
import diffsptk

fl = 512 # Frame length.

# Read waveform.
x, sr = diffsptk.read("assets/data.wav")

# Transform x.
mdct = diffsptk.MDCT(fl)
c = mdct(x)

# Reconstruct x.
imdct = diffpstk.IMDCT(fl)
x_hat = imdct(c, out_length=x.size(0))

# Write reconstructed waveform.
diffsptk.write("reconst.wav", x_hat, sr)

# Compute error.
error = (x_hat - x).abs().sum()
print(error)
```

### Vector quantization

```python
Expand Down
14 changes: 10 additions & 4 deletions diffsptk/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -1005,21 +1005,24 @@ def lar2par(g):
return nn.LogAreaRatioToParcorCoefficients._func(g)


def levdur(r):
def levdur(r, eps=1e-6):
"""Solve a Yule-Walker linear system.

Parameters
----------
r : Tensor [shape=(..., M+1)]
Autocorrelation.

eps : float >= 0
A small value to improve numerical stability.

Returns
-------
out : Tensor [shape=(..., M+1)]
Gain and LPC coefficients.

"""
return nn.LevinsonDurbin._func(r)
return nn.LevinsonDurbin._func(r, eps=eps)


def linear_intpl(x, upsampling_factor=80):
Expand All @@ -1042,7 +1045,7 @@ def linear_intpl(x, upsampling_factor=80):
return nn.LinearInterpolation._func(x, upsampling_factor=upsampling_factor)


def lpc(x, lpc_order):
def lpc(x, lpc_order, eps=1e-6):
"""Compute LPC coefficients.

Parameters
Expand All @@ -1053,13 +1056,16 @@ def lpc(x, lpc_order):
lpc_order : int >= 0
Order of LPC, :math:`M`.

eps : float >= 0
A small value to improve numerical stability.

Returns
-------
out : Tensor [shape=(..., M+1)]
Gain and LPC coefficients.

"""
return nn.LinearPredictiveCodingAnalysis._func(x, lpc_order=lpc_order)
return nn.LinearPredictiveCodingAnalysis._func(x, lpc_order=lpc_order, eps=eps)


def lpc2lsp(a, log_gain=False, sample_rate=None, out_format="radian"):
Expand Down
2 changes: 1 addition & 1 deletion diffsptk/modules/acorr.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def _forward(x, acr_order, norm, const):
fft_length = x.size(-1) + acr_order
if fft_length % 2 == 1:
fft_length += 1
X = torch.square(torch.fft.rfft(x, n=fft_length).abs())
X = torch.fft.rfft(x, n=fft_length).abs().square()
r = torch.fft.irfft(X)[..., : acr_order + 1] * const
if norm:
r = r / r[..., :1]
Expand Down
1 change: 1 addition & 0 deletions diffsptk/modules/acr2csm.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def forward(self, r):

@staticmethod
def _forward(r, C):
assert r.dtype == torch.double
u = torch.matmul(r, C)
u1, u2 = torch.tensor_split(u, 2, dim=-1)

Expand Down
23 changes: 18 additions & 5 deletions diffsptk/modules/levdur.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,18 @@ class LevinsonDurbin(nn.Module):
lpc_order : int >= 0
Order of LPC coefficients, :math:`M`.

eps : float >= 0
A small value to improve numerical stability.

"""

def __init__(self, lpc_order):
def __init__(self, lpc_order, eps=0):
super().__init__()

assert 0 <= lpc_order

self.lpc_order = lpc_order
self.register_buffer("eye", self._precompute(self.lpc_order, eps))

def forward(self, r):
"""Solve a Yule-Walker linear system.
Expand Down Expand Up @@ -65,14 +69,14 @@ def forward(self, r):

"""
check_size(r.size(-1), self.lpc_order + 1, "dimension of autocorrelation")
return self._forward(r)
return self._forward(r, self.eye)

@staticmethod
def _forward(r):
def _forward(r, eye):
r0, r1 = torch.split(r, [1, r.size(-1) - 1], dim=-1)

# Make Toeplitz matrix.
R = symmetric_toeplitz(r[..., :-1]) # [..., M, M]
R = symmetric_toeplitz(r[..., :-1]) + eye # [..., M, M]

# Solve system.
a = torch.matmul(R.inverse(), -r1.unsqueeze(-1)).squeeze(-1)
Expand All @@ -83,4 +87,13 @@ def _forward(r):
a = torch.cat((K, a), dim=-1)
return a

_func = _forward
@staticmethod
def _func(r, eps):
eye = LevinsonDurbin._precompute(
r.size(-1) - 1, eps, dtype=r.dtype, device=r.device
)
return LevinsonDurbin._forward(r, eye)

@staticmethod
def _precompute(order, eps, dtype=None, device=None):
return torch.eye(order, dtype=dtype, device=device) * eps
15 changes: 9 additions & 6 deletions diffsptk/modules/lpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

class LinearPredictiveCodingAnalysis(nn.Module):
"""See `this page <https://sp-nitech.github.io/sptk/latest/main/lpc.html>`_
for details. This module is a simple cascade of acorr and levdur.
for details. Double precision is recommended.

Parameters
----------
Expand All @@ -32,14 +32,17 @@ class LinearPredictiveCodingAnalysis(nn.Module):
lpc_order : int >= 0
Order of LPC, :math:`M`.

eps : float >= 0
A small value to improve numerical stability.

"""

def __init__(self, frame_length, lpc_order):
def __init__(self, frame_length, lpc_order, eps=1e-6):
super().__init__()

self.lpc = nn.Sequential(
Autocorrelation(frame_length, lpc_order),
LevinsonDurbin(lpc_order),
LevinsonDurbin(lpc_order, eps=eps),
)

def forward(self, x):
Expand All @@ -59,7 +62,7 @@ def forward(self, x):
--------
>>> x = diffsptk.nrand(4)
tensor([ 0.8226, -0.0284, -0.5715, 0.2127, 0.1217])
>>> lpc = diffsptk.LPC(2, 5)
>>> lpc = diffsptk.LPC(5, 2)
>>> a = lpc(x)
>>> a
tensor([0.8726, 0.1475, 0.5270])
Expand All @@ -68,7 +71,7 @@ def forward(self, x):
return self.lpc(x)

@staticmethod
def _func(x, lpc_order):
def _func(x, lpc_order, eps):
r = Autocorrelation._func(x, lpc_order)
a = LevinsonDurbin._func(r)
a = LevinsonDurbin._func(r, eps)
return a
4 changes: 4 additions & 0 deletions tests/test_acr2csm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# ------------------------------------------------------------------------ #

import pytest
import torch

import diffsptk
import tests.utils as U
Expand All @@ -23,6 +24,9 @@
@pytest.mark.parametrize("device", ["cpu", "cuda"])
@pytest.mark.parametrize("module", [False, True])
def test_compatibility(device, module, M=25, L=100, B=2):
if torch.get_default_dtype() != torch.double: # pragma: no cover
return

acr2csm = U.choice(
module,
diffsptk.AutocorrelationToCompositeSinusoidalModelCoefficients,
Expand Down
4 changes: 4 additions & 0 deletions tests/test_csm2acr.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# ------------------------------------------------------------------------ #

import pytest
import torch

import diffsptk
import tests.utils as U
Expand All @@ -23,6 +24,9 @@
@pytest.mark.parametrize("device", ["cpu", "cuda"])
@pytest.mark.parametrize("module", [False, True])
def test_compatibility(device, module, M=25, L=100, B=2):
if torch.get_default_dtype() != torch.double: # pragma: no cover
return

csm2acr = U.choice(
module,
diffsptk.CompositeSinusoidalModelCoefficientsToAutocorrelation,
Expand Down
4 changes: 3 additions & 1 deletion tests/test_imdct.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# ------------------------------------------------------------------------ #

import pytest
import torch

import diffsptk
import tests.utils as U
Expand All @@ -34,9 +35,10 @@ def test_compatibility(device, module, window, L=512):
mdct_params,
)

# torch.round is for float precision.
U.check_compatibility(
device,
[imdct, mdct],
[torch.round, imdct, mdct],
[],
"x2x +sd tools/SPTK/asset/data.short",
"sopr",
Expand Down
4 changes: 3 additions & 1 deletion tests/test_istft.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from operator import itemgetter

import pytest
import torch

import diffsptk
import tests.utils as U
Expand All @@ -41,9 +42,10 @@ def test_compatibility(device, module, T=19200):
stft_params,
)

# torch.round is for float precision.
U.check_compatibility(
device,
[itemgetter(slice(0, T)), istft, stft],
[torch.round, itemgetter(slice(0, T)), istft, stft],
[],
"x2x +sd tools/SPTK/asset/data.short",
"sopr",
Expand Down
7 changes: 5 additions & 2 deletions tests/test_lpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@
import tests.utils as U


@pytest.mark.parametrize("device", ["cpu", "cuda"])
@pytest.mark.parametrize("module", [False, True])
def test_compatibility(module, M=14, L=30, B=2):
def test_compatibility(device, module, M=14, L=30, B=2):
lpc = U.choice(
module,
diffsptk.LPC,
Expand All @@ -31,7 +32,7 @@ def test_compatibility(module, M=14, L=30, B=2):
)

U.check_compatibility(
"cpu",
device,
lpc,
[],
f"nrand -l {B*L}",
Expand All @@ -40,3 +41,5 @@ def test_compatibility(module, M=14, L=30, B=2):
dx=L,
dy=M + 1,
)

U.check_differentiability(device, lpc, [B, L])
Loading