Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed issue #160 #161

Merged
merged 2 commits into from
Feb 15, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 9 additions & 3 deletions aeneas/adjustboundaryalgorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ def adjust(
boundary_indices,
real_wave_mfcc,
text_file,
allow_arbitrary_shift=False
):
"""
Adjust the boundaries of the text map
Expand All @@ -255,6 +256,7 @@ def adjust(
:type real_wave_mfcc: :class:`~aeneas.audiofilemfcc.AudioFileMFCC`
:param text_file: the text file containing the text fragments associated
:type text_file: :class:`~aeneas.textfile.TextFile`
:param bool allow_arbitrary_shift: if ``True``, allow arbitrary shifts when adjusting zero length

:rtype: list of :class:`~aeneas.syncmap.SyncMapFragmentList`
"""
Expand All @@ -281,7 +283,7 @@ def adjust(
self.log(u" Converting boundary indices to fragment list... done")

self.log(u" Processing fragments with zero length...")
self._process_zero_length(nozero)
self._process_zero_length(nozero, allow_arbitrary_shift)
self.log(u" Processing fragments with zero length... done")

self.log(u" Processing nonspeech fragments...")
Expand Down Expand Up @@ -402,7 +404,7 @@ def append_fragment_list_to_sync_root(self, sync_root):
# NO ZERO AND LONG NONSPEECH FUNCTIONS
# #####################################################

def _process_zero_length(self, nozero):
def _process_zero_length(self, nozero, allow_arbitrary_shift):
"""
If ``nozero`` is ``True``, modify the sync map fragment list
so that no fragment will have zero length.
Expand All @@ -414,7 +416,11 @@ def _process_zero_length(self, nozero):
self.log(u"Processing zero length intervals requested")
self.log(u" Checking and fixing...")
duration = self.rconf[RuntimeConfiguration.ABA_NO_ZERO_DURATION]
self.log([u" No zero duration: %.3f", duration])
self.log([u" Requested no zero duration: %.3f", duration])
if not allow_arbitrary_shift:
self.log(u" No arbitrary shift => taking max with mws")
duration = self.rconf.mws.geq_multiple(duration)
self.log([u" Actual no zero duration: %.3f", duration])
# ignore HEAD and TAIL
max_index = len(self.smflist) - 1
self.smflist.fix_zero_length_fragments(
Expand Down
79 changes: 56 additions & 23 deletions aeneas/dtw.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,20 +155,27 @@ def __init__(
self.real_wave_mfcc = AudioFileMFCC(self.real_wave_path, rconf=self.rconf, logger=self.logger)
if (self.synt_wave_mfcc is None) and (self.synt_wave_path is not None):
self.synt_wave_mfcc = AudioFileMFCC(self.synt_wave_path, rconf=self.rconf, logger=self.logger)
self.dtw = None

def compute_accumulated_cost_matrix(self):
"""
Compute the accumulated cost matrix, and return it.

Return ``None`` if the accumulated cost matrix cannot be computed
because one of the two waves is empty after masking (if requested).

:rtype: :class:`numpy.ndarray` (2D)
:raises: RuntimeError: if both the C extension and
the pure Python code did not succeed.

.. versionadded:: 1.2.0
"""
dtw = self._setup_dtw()
self._setup_dtw()
if self.dtw is None:
self.log(u"Inner self.dtw is None => returning None")
return None
self.log(u"Returning accumulated cost matrix")
return dtw.compute_accumulated_cost_matrix()
return self.dtw.compute_accumulated_cost_matrix()

def compute_path(self):
"""
Expand All @@ -183,13 +190,19 @@ def compute_path(self):
and ``s_i`` are the indices in the synthesized wave,
and ``k`` is the length of the min cost path.

Return ``None`` if the accumulated cost matrix cannot be computed
because one of the two waves is empty after masking (if requested).

:rtype: tuple (see above)
:raises: RuntimeError: if both the C extension and
the pure Python code did not succeed.
"""
dtw = self._setup_dtw()
self._setup_dtw()
if self.dtw is None:
self.log(u"Inner self.dtw is None => returning None")
return None
self.log(u"Computing path...")
wave_path = dtw.compute_path()
wave_path = self.dtw.compute_path()
self.log(u"Computing path... done")
self.log(u"Translating path to full wave indices...")
real_indices = numpy.array([t[0] for t in wave_path])
Expand Down Expand Up @@ -230,6 +243,16 @@ def compute_boundaries(self, synt_anchors):

:rtype: :class:`numpy.ndarray` (1D)
"""
self._setup_dtw()
if self.dtw is None:
self.log(u"Inner self.dtw is None => returning artificial boundary indices")
begin = self.real_wave_mfcc.middle_begin
end = self.real_wave_mfcc.tail_begin
n = len(synt_anchors)
step = float(end - begin) / n
boundary_indices = [begin + int(i * step) for i in range(n)] + [end]
return numpy.array(boundary_indices)

self.log(u"Computing path...")
real_indices, synt_indices = self.compute_path()
self.log(u"Computing path... done")
Expand Down Expand Up @@ -276,6 +299,10 @@ def _setup_dtw(self):
"""
Set the DTW object up.
"""
# check if the DTW object has already been set up
if self.dtw is not None:
return

# check we have the AudioFileMFCC objects
if (self.real_wave_mfcc is None) or (self.real_wave_mfcc.middle_mfcc is None):
self.log_exc(u"The real wave MFCCs are not initialized", None, True, DTWAlignerNotInitialized)
Expand Down Expand Up @@ -308,26 +335,32 @@ def _setup_dtw(self):
self.log(u"Using unmasked MFCC")
real_mfcc = self.real_wave_mfcc.middle_mfcc
synt_mfcc = self.synt_wave_mfcc.middle_mfcc

# execute the selected algorithm
if algorithm == DTWAlgorithm.EXACT:
self.log(u"Computing with EXACT algo")
dtw = DTWExact(
m1=real_mfcc,
m2=synt_mfcc,
rconf=self.rconf,
logger=self.logger
)
n = real_mfcc.shape[1]
m = synt_mfcc.shape[1]
self.log([u" Number of MFCC frames in real wave: %d", n])
self.log([u" Number of MFCC frames in synt wave: %d", m])
if (n == 0) or (m == 0):
self.log(u"Setting self.dtw to None")
self.dtw = None
else:
self.log(u"Computing with STRIPE algo")
dtw = DTWStripe(
m1=real_mfcc,
m2=synt_mfcc,
delta=delta,
rconf=self.rconf,
logger=self.logger
)
return dtw
# set the selected algorithm
if algorithm == DTWAlgorithm.EXACT:
self.log(u"Computing with EXACT algo")
self.dtw = DTWExact(
m1=real_mfcc,
m2=synt_mfcc,
rconf=self.rconf,
logger=self.logger
)
else:
self.log(u"Computing with STRIPE algo")
self.dtw = DTWStripe(
m1=real_mfcc,
m2=synt_mfcc,
delta=delta,
rconf=self.rconf,
logger=self.logger
)


class DTWStripe(Loggable):
Expand Down
13 changes: 13 additions & 0 deletions aeneas/exacttiming.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from __future__ import print_function
from decimal import Decimal
from decimal import InvalidOperation
import math
import sys


Expand All @@ -63,6 +64,18 @@ def is_integer(self):
"""
return self == int(self)

def geq_multiple(self, other):
"""
Return the next multiple of this time value,
greater than or equal to ``other``.
If ``other`` is zero, return this time value.

:rtype: :class:`~aeneas.exacttiming.TimeValue`
"""
if other == TimeValue("0.000"):
return self
return int(math.ceil(other / self)) * self

# NOTE overriding so that the result
# is still an instance of TimeValue

Expand Down
44 changes: 33 additions & 11 deletions aeneas/executetask.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,8 @@ def _execute_single_level_task(self):
self.task.text_file,
sync_root=sync_root,
force_aba_auto=False,
log=True
log=True,
leaf_level=True
)
self._clear_cache_synthesizer()

Expand Down Expand Up @@ -325,11 +326,14 @@ def _execute_level(self, level, audio_file_mfcc, text_files, sync_roots, force_a
self.log([u"Text level %d, fragment %d", level, text_file_index])
self.log([u" Len: %d", len(text_file)])
sync_root = sync_roots[text_file_index]
if (level > 1) and (len(text_file) == 1) and (not sync_root.is_empty):
if (level > 1) and (len(text_file) == 1):
self.log(u"Level > 1 and only one text fragment => return trivial tree")
self._append_trivial_tree(text_file, audio_file_mfcc.audio_length, sync_root)
self._append_trivial_tree(text_file, sync_root)
elif (level > 1) and (sync_root.value.begin == sync_root.value.end):
self.log(u"Level > 1 and parent has begin == end => return trivial tree")
self._append_trivial_tree(text_file, sync_root)
else:
self.log(u"Level == 1 or more than one text fragment => compute tree")
self.log(u"Level == 1 or more than one text fragment with non-zero parent => compute tree")
if not sync_root.is_empty:
begin = sync_root.value.begin
end = sync_root.value.end
Expand All @@ -343,7 +347,8 @@ def _execute_level(self, level, audio_file_mfcc, text_files, sync_roots, force_a
text_file,
sync_root=sync_root,
force_aba_auto=force_aba_auto,
log=False
log=False,
leaf_level=(level == 3)
)
# store next level roots
next_level_text_files.extend(text_file.children_not_empty)
Expand All @@ -352,7 +357,7 @@ def _execute_level(self, level, audio_file_mfcc, text_files, sync_roots, force_a
self._clear_cache_synthesizer()
return (next_level_text_files, next_level_sync_roots)

def _execute_inner(self, audio_file_mfcc, text_file, sync_root=None, force_aba_auto=False, log=True):
def _execute_inner(self, audio_file_mfcc, text_file, sync_root=None, force_aba_auto=False, log=True, leaf_level=False):
"""
Align a subinterval of the given AudioFileMFCC
with the given TextFile.
Expand All @@ -374,6 +379,7 @@ def _execute_inner(self, audio_file_mfcc, text_file, sync_root=None, force_aba_a
:type sync_root: :class:`~aeneas.tree.Tree`
:param bool force_aba_auto: if ``True``, do not run aba algorithm
:param bool log: if ``True``, log steps
:param bool leaf_level: alert aba if the computation is at a leaf level
:rtype: :class:`~aeneas.tree.Tree`
"""
self._step_begin(u"synthesize text", log=log)
Expand All @@ -393,7 +399,7 @@ def _execute_inner(self, audio_file_mfcc, text_file, sync_root=None, force_aba_a
self._step_end(log=log)

self._step_begin(u"adjust boundaries", log=log)
self._adjust_boundaries(indices, text_file, audio_file_mfcc, sync_root, force_aba_auto)
self._adjust_boundaries(indices, text_file, audio_file_mfcc, sync_root, force_aba_auto, leaf_level)
self._step_end(log=log)

def _load_audio_file(self):
Expand Down Expand Up @@ -552,7 +558,7 @@ def _align_waves(self, real_wave_mfcc, synt_wave_mfcc, synt_anchors):
self.log(u"Computing boundary indices... done")
return boundary_indices

def _adjust_boundaries(self, boundary_indices, text_file, real_wave_mfcc, sync_root, force_aba_auto=False):
def _adjust_boundaries(self, boundary_indices, text_file, real_wave_mfcc, sync_root, force_aba_auto=False, leaf_level=False):
"""
Adjust boundaries as requested by the user.

Expand All @@ -577,18 +583,34 @@ def _adjust_boundaries(self, boundary_indices, text_file, real_wave_mfcc, sync_r
real_wave_mfcc=real_wave_mfcc,
boundary_indices=boundary_indices,
text_file=text_file,
allow_arbitrary_shift=leaf_level
)
aba.append_fragment_list_to_sync_root(sync_root=sync_root)

def _append_trivial_tree(self, text_file, end, sync_root):
def _append_trivial_tree(self, text_file, sync_root):
"""
Append trivial tree, made by HEAD, one fragment, and TAIL.
Append trivial tree, made by one HEAD,
one sync map fragment for each element of ``text_file``,
and one TAIL.

This function is called if either ``text_file`` has only one element,
or if ``sync_root.value`` is an interval with zero length
(i.e., ``sync_root.value.begin == sync_root.value.end``).
"""
interval = sync_root.value
#
# NOTE the following is correct, but it is a bit obscure
# time_values = [interval.begin] * (1 + len(text_file)) + [interval.end] * 2
#
if len(text_file) == 1:
time_values = [interval.begin, interval.begin, interval.end, interval.end]
else:
# interval.begin == interval.end
time_values = [interval.begin] * (3 + len(text_file))
aba = AdjustBoundaryAlgorithm(rconf=self.rconf, logger=self.logger)
aba.intervals_to_fragment_list(
text_file=text_file,
time_values=[TimeValue("0.000"), interval.begin, interval.end, end],
time_values=time_values
)
aba.append_fragment_list_to_sync_root(sync_root=sync_root)

Expand Down
6 changes: 6 additions & 0 deletions aeneas/syncmap/fragmentlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,9 +621,12 @@ def fix_zero_length_fragments(self, duration=TimeValue("0.001"), min_index=None,
fixable = True
if fixable:
for index, move_type, move_amount in moves[::-1]:
self.log([u" Calling move_end_at with %.3f at index %d", current_time, index])
self[index].interval.move_end_at(current_time)
if move_type == "ENLARGE":
self.log([u" Calling enlarge with %.3f at index %d", move_amount, index])
self[index].interval.enlarge(move_amount)
self.log([u" Interval %d is now: %s", index, self[index].interval])
current_time = self[index].interval.begin
else:
self.log([u"Unable to fix fragment %d (%s)", i, self[i].interval])
Expand All @@ -641,6 +644,9 @@ def fix_zero_length_fragments(self, duration=TimeValue("0.001"), min_index=None,
self.log([u" Original was %.3f", original_last_end])
self.log([u" New is %.3f", self[max_index].begin])
self[max_index].begin = self[max_index - 1].end
self.log(u"Fragments after fixing:")
for i, fragment in enumerate(self):
self.log([u" %d => %.3f %.3f", i, fragment.interval.begin, fragment.interval.end])

def fix_fragment_rate(self, fragment_index, max_rate, aggressive=False):
def fix_pair(current_index, donor_index):
Expand Down
12 changes: 12 additions & 0 deletions aeneas/tests/test_exacttiming.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,18 @@ def test_repr(self):
tv1 = TimeValue("1.234")
self.assertEqual(tv1.__repr__(), "TimeValue('1.234')")

def test_geq_multiple(self):
tv1 = TimeValue("0.005")
tv2 = TimeValue("0.000")
tv3 = TimeValue("0.001")
tv4 = TimeValue("0.006")
tv5 = TimeValue("0.010")
self.check(tv1.geq_multiple(tv1), tv1)
self.check(tv1.geq_multiple(tv2), tv1)
self.check(tv1.geq_multiple(tv3), tv1)
self.check(tv1.geq_multiple(tv4), tv5)
self.check(tv1.geq_multiple(tv5), tv5)

def test_add(self):
tv1 = TimeValue("1.100")
tv2 = TimeValue("2.200")
Expand Down
2 changes: 1 addition & 1 deletion aeneas/tests/test_syncmapfragmentlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -1001,7 +1001,7 @@ def test_time_interval_list_fix_zero_length_fragments_middle(self):
i = TimeInterval(begin=TimeValue(b), end=TimeValue(e))
s = SyncMapFragment(interval=i)
l.add(s)
l.fix_zero_length_fragments(min_index=1, max_index=(len(l) - 1))
l.fix_zero_length_fragments(duration=TimeValue("0.001"), min_index=1, max_index=(len(l) - 1))
for j, fragment in enumerate(l.fragments):
b, e = exp[j]
exp_i = TimeInterval(begin=TimeValue(b), end=TimeValue(e))
Expand Down
2 changes: 2 additions & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ Changelog
v1.7.2 (2017-??-??)
-------------------

#. Fixed bug #160
#. Fixed a latent bug with arbitrary shifts in aba when using the ``task_adjust_boundary_no_zero`` option
#. Added MacOS TTS Wrapper (courtesy of Chris Vaughn)
#. Updated copyright strings with 2017

Expand Down