Skip to content

Commit

Permalink
Version 0.1.2
Browse files Browse the repository at this point in the history
* Change default speed to ``default``
* Remove duplicated breaks after paragraphs
* Add metadata to resulting MP3 (title, author, track, year,
  genre)
  • Loading branch information
Yoshiki Shibukawa committed Jan 23, 2017
1 parent fe8bce0 commit 93551e8
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 17 deletions.
20 changes: 19 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ Setting
* Emphasis level of section titles
* ``"strong"``, ``"moderate"``, ``"none"``, ``"reduced"``
- * ``ssml_paragraph_speed``
* ``"medium"``
* ``"default"``
* Speech speed
* ``"x-slow"``, ``"slow"``, ``"medium"``, ``"fast"``, ``"x-fast"``, ``"default"``
- * ``ssml_break_after_paragraph``
Expand Down Expand Up @@ -108,3 +108,21 @@ Repository
-----------

https://github.com/shibukawa/sphinxcontrib-ssmlbuilder

History
-------

* 0.1.2 Jan 24 2017

* Change default speed to ``default``
* Remove duplicated breaks after paragraphs
* Add metadata to resulting MP3 (title, author, track, year, genre)

* 0.1.1 Jan 23 2017

* Add skipping block option

* 0.1 Jan 23 2017

* Initial Version

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

setup(
name='sphinxcontrib-ssmlbuilder',
version='0.1.1',
version='0.1.2',
url='https://github.com/shibukawa/sphinxcontrib-ssmlbuilder',
download_url='http://pypi.python.org/pypi/sphinxcontrib-ssmlbuilder',
license='BSD',
Expand Down
53 changes: 46 additions & 7 deletions sphinxcontrib/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from .writer import SSMLWriter
from sphinx.util.osutil import SEP, os_path, relative_uri, ensuredir, \
movefile, copyfile
from sphinx import addnodes
import os
from os import path
import codecs
Expand All @@ -23,6 +24,8 @@
from fnmatch import fnmatch
from boto3 import Session
from botocore.exceptions import BotoCoreError, ClientError
import datetime
import re


class SSMLBuilder(Builder):
Expand All @@ -37,9 +40,8 @@ class SSMLBuilder(Builder):
ssml_break_around_section_title = [2000, 1600, 1000, 1000, 1000, 1000]
ssml_break_after_paragraph = 1000
ssml_emphasis_section_title = ['none', 'none', 'none', 'none', 'none', 'none']
ssml_paragraph_speed = 'medium'
ssml_paragraph_speed = 'default'
ssml_polly_audio_output_folder = 'polly'
ssml_polly_audio_format = 'mp3'
ssml_polly_aws_profile = ''
ssml_polly_aws_voiceid = 'Joanna'
ssml_polly_apply_docnames = ''
Expand Down Expand Up @@ -97,7 +99,7 @@ def prepare_writing(self, docnames):
self.writer = SSMLWriter(self)

def write_doc(self, docname, doctree):
destination = {"hashes": {}, "sequence": []}
destination = {"hashes": {}, "sequence": [], "title": ""}
outfilename = path.join(self.outdir, self.file_transform(docname))
ensuredir(path.dirname(outfilename))
self.writer.write(doctree, destination, docname, path.join(self.outdir, docname))
Expand All @@ -110,6 +112,23 @@ def write_doc(self, docname, doctree):
except (IOError, OSError) as err:
self.warn("error writing file %s: %s" % (outfilename, err))

def sort_docnames(self):
result = [self.config.master_doc]
self._sort_docnames(self.config.master_doc, result)
return result

def _sort_docnames(self, docname, traversed):
tree = self.env.get_doctree(docname)
for toctreenode in tree.traverse(addnodes.toctree):
includefiles = map(str, toctreenode['includefiles'])
for includefile in includefiles:
if includefile not in traversed:
try:
traversed.append(includefile)
self._sort_docnames(includefile, traversed)
except Exception:
pass

def exec_polly(self):
print("ssml_polly_aws_profile: ", self.ssml_polly_aws_profile)
print("ssml_polly_apply_docnames: ", self.config.ssml_polly_apply_docnames)
Expand Down Expand Up @@ -137,10 +156,9 @@ def exec_polly(self):
allhash.add(hashname)
hash2path.update(d["hashes"])
if fnmatch(docname, apply_docname):
targets.append({"docname": docname, "sequence": d["sequence"]})
targets.append({"docname": docname, "sequence": d["sequence"], "title": d["title"]})
for hashname in d["hashes"]:
allneededhash.add(hashname)

# read existing path
existinghash = set()
for mp3file in os.listdir(workdirpath):
Expand All @@ -152,7 +170,7 @@ def exec_polly(self):
#print("targets", targets)
#print("must_remove:", must_remove)
#print("must_convert:", must_convert)

# exec polly
session = Session(profile_name=self.ssml_polly_aws_profile)
polly = session.client("polly")
Expand All @@ -177,16 +195,37 @@ def exec_polly(self):
mp3file.write(response.get("AudioStream").read())
mp3file.close()

# metadata
album = self.config.project
year = datetime.datetime.now().year
author = ''
match = re.match(r'(\d{4}), (.*)', self.config.copyright)
if match:
year = match.group(1)
author = match.group(2)
document_order = self.sort_docnames()

# concat mp3 fragments
task = 1
for target in targets:
docname = target['docname']
title = target['title']
track = document_order.index(docname) + 1

print(f"concatinating MP3 fragments: {docname}.mp3 ({task}/{len(targets)})")
task+=1
sources = [hashkey + '.mp3' for hashkey in target['sequence']]
outfilename = path.join(outputpath, f"{docname}.mp3")
ensuredir(path.dirname(outfilename))
args = ['ffmpeg', "-i", "concat:" + "|".join(sources), "-c", "copy", outfilename]

args = ['ffmpeg', "-y", "-i", "concat:" + "|".join(sources), "-c", "copy",
'-metadata', f'album="{album}"',
'-metadata', f'author="{author}"',
'-metadata', f'title="{title}"',
'-metadata', f'track="{track}"',
'-metadata', 'genre="Audio Book"',
'-metadata', f'year="{year}"',
outfilename]
#print(args, workdirpath)
p = subprocess.Popen(args, shell=False, cwd=workdirpath)
p.wait()
Expand Down
2 changes: 1 addition & 1 deletion sphinxcontrib/ssmlbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def setup(app):
app.add_config_value('ssml_break_around_section_title', [2000, 1600, 1000, 1000, 1000, 1000], True)
app.add_config_value('ssml_emphasis_section_title', ['none', 'none', 'none', 'none', 'none', 'none'], True)
app.add_config_value('ssml_skip_block', {'comment': True, 'table': True, 'codeblock': True}, True)
app.add_config_value('ssml_paragraph_speed', 'medium', True)
app.add_config_value('ssml_paragraph_speed', 'default', True)
app.add_config_value('ssml_break_after_paragraph', 1000, True)
app.add_config_value('ssml_polly_audio_output_folder', "polly", True)
app.add_config_value('ssml_polly_aws_profile', "", False)
Expand Down
17 changes: 10 additions & 7 deletions sphinxcontrib/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,13 @@ def __init__(self, document, builder, destination, docname, basepath):
self.basepath = basepath
self.state = ['regular']

def active(self):
laststate = self.state[-1]
return not self.builder.ssml_skip_block.get(laststate, False)

def add_text(self, text):
# type: (unicode) -> None
laststate = self.state[-1]
if self.builder.ssml_skip_block.get(laststate, False):
# this code block is configured to be skip
pass
else:
if self.active():
self.contents.append([len(text), REGULAR, escape(text)])

def reset_content(self):
Expand Down Expand Up @@ -107,7 +107,7 @@ def next_node_character_count(index):
middle += "-" + str(i + 1)
filepath = self.basepath + middle + ".ssml"
filename = self.docname + middle + ".ssml"
sha = hashlib.sha256(output.encode('utf-8')).hexdigest()
sha = hashlib.sha1(output.encode('utf-8')).hexdigest()
self.destination["hashes"][sha] = filename
self.destination["sequence"].append(sha)
f = open(filepath, "w")
Expand Down Expand Up @@ -168,6 +168,8 @@ def depart_glossary(self, node):
pass

def visit_title(self, node):
if not self.destination['title']:
self.destination['title'] = node.astext()
level = self.sectionlevel-1
breaklength = self.builder.ssml_break_around_section_title[level]
emphasis = self.builder.ssml_emphasis_section_title[level]
Expand Down Expand Up @@ -389,7 +391,8 @@ def depart_compact_paragraph(self, node):

def visit_paragraph(self, node):
# type: (nodes.Node) -> None
self.contents.append([0, JOIN_BEFORE, '<break time="%dms" />' % self.builder.ssml_break_after_paragraph])
if self.active():
self.contents.append([0, JOIN_BEFORE, '<break time="%dms" />' % self.builder.ssml_break_after_paragraph])
info("visit", node)

def depart_paragraph(self, node):
Expand Down

0 comments on commit 93551e8

Please sign in to comment.