Skip to content

Commit

Permalink
Anemone fix wav input
Browse files Browse the repository at this point in the history
  • Loading branch information
ssb22 committed Apr 15, 2024
1 parent 9360664 commit 21c918f
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 9 deletions.
4 changes: 2 additions & 2 deletions README.md
Expand Up @@ -36,9 +36,9 @@ from http://ssb22.user.srcf.net/indexer/anemone.html

`anemone.py` is a Python 3 script to put together a DAISY digital talking book, from HTML text, MP3 audio recordings and time index data. It produces DAISY 2.02 files by default, or DAISY 3 (i.e. ANSI/NISO Z39.86) if an option is set. It currently can produce one of two different types of digital talking book:

1. Full audio with basic Navigation Control Centre only: this requires a list of MP3 files for the audio, one MP3 per section, and the title of each section can be placed either in a separate text file or in the filename of the MP3 file.
1. Full audio with basic Navigation Control Centre only: this requires a list of MP3 or WAV files for the audio, one per section, and the title of each section can be placed either in a separate text file or in the filename of the audio file.

2. Full audio with full text: this requires MP3 files for the audio, corresponding XHTML files for the text, and corresponding JSON files for the timing synchronisation. Each JSON file is expected to contain a list called `"markers"` whose items contain `"id"` (or `"paragraphId"` or anything else ending id) and `"time"` (or `"startTime"` or anything else ending time), which can be in seconds, minutes:seconds or hours:minutes:seconds (fractions of a second are allowed in each case). The IDs in these JSON files should have corresponding attributes in the XHTML, by default data-pid but this can be changed with an option.
2. Full audio with full text: this requires MP3 or WAV files for the audio, corresponding XHTML files for the text, and corresponding JSON files for the timing synchronisation. Each JSON file is expected to contain a list called `"markers"` whose items contain `"id"` (or `"paragraphId"` or anything else ending id) and `"time"` (or `"startTime"` or anything else ending time), which can be in seconds, minutes:seconds or hours:minutes:seconds (fractions of a second are allowed in each case). The IDs in these JSON files should have corresponding attributes in the XHTML, by default data-pid but this can be changed with an option.

All files are placed on the command line (or in parameters if you're using Anemone as a module), and Anemone assumes the correspondences are ordered. So for example if MP3, HTML and JSON files are given, Anemone assumes the first-listed MP3 file corresponds with the first-listed HTML file and the first-listed JSON file, and so on for the second, third, etc. With most sensible file naming schemes, you should be able to use shell wildcards like `*` when passing the files to Anemone. You may also set the name of an output file ending `zip`; the suffix `_daisy.zip` is common. The title, publisher, language etc of the book should be set via options: run the program with `--help` to see all.

Expand Down
16 changes: 9 additions & 7 deletions anemone.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python3
"""
Anemone 1.41 (http://ssb22.user.srcf.net/anemone)
Anemone 1.42 (http://ssb22.user.srcf.net/anemone)
(c) 2023-24 Silas S. Brown. License: Apache 2
Run program with --help for usage instructions.
"""
Expand Down Expand Up @@ -106,6 +106,7 @@ class AnemoneError(Exception): pass

try: from mutagen.mp3 import MP3
except ImportError: error("Anemone needs the Mutagen library to determine MP3 play lengths.\nPlease do: pip install mutagen")
from mutagen.wave import WAVE

class Run(): # INTERNAL
"""The parameters we need for an Anemone run.
Expand All @@ -123,7 +124,7 @@ def __init__(R,*inFiles,**kwargs):
else: R.__dict__.update(get_argument_parser().parse_args().__dict__)
for f in R.files:
f = f.strip()
if f.endswith(f"{os.extsep}zip"):
if f.lower().endswith(f"{os.extsep}zip"):
if R.outputFile: error(f"Only one {os.extsep}zip output file may be specified")
R.outputFile = f ; continue
if re.match("https?://",f):
Expand All @@ -136,13 +137,13 @@ def __init__(R,*inFiles,**kwargs):
elif f.startswith('<') and f.endswith('>'):
R.htmlData.append(f) ; continue
elif not os.path.exists(f): error(f"File not found: {f}")
if f.endswith(f"{os.extsep}mp3") or f.endswith(f"{os.extsep}wav"):
if f.lower().endswith(f"{os.extsep}mp3") or f.lower().endswith(f"{os.extsep}wav"):
if f.endswith(f"{os.extsep}wav") and not R.mp3_recode: error("wav input requires mp3 recode to be set")
R.recordingFiles.append(f)
elif f.endswith(f"{os.extsep}json"): R.jsonData.append(json.load(open(f,encoding="utf-8")))
elif f.endswith(f"{os.extsep}txt"):
elif f.lower().endswith(f"{os.extsep}json"): R.jsonData.append(json.load(open(f,encoding="utf-8")))
elif f.lower().endswith(f"{os.extsep}txt"):
R.textFiles.append(f)
elif f.endswith(f"{os.extsep}html") or not os.extsep in f.rsplit(os.sep,1)[-1]:
elif f.lower().endswith(f"{os.extsep}html") or not os.extsep in f.rsplit(os.sep,1)[-1]:
R.htmlData.append(open(f,encoding="utf-8").read())
else: error(f"Can't handle '{f}'")
if not R.recordingFiles: error("Creating DAISY files without audio is not yet implemented")
Expand Down Expand Up @@ -313,8 +314,9 @@ def D(s): return s.replace("\n","\r\n") # in case old readers require DOS line e
secsSoFar = 0
durations = [] ; curP = 1
for recNo in range(1,len(recordingTexts)+1):
secsThisRecording = MP3(R.recordingFiles[recNo-1]).info.length
rTxt = recordingTexts[recNo-1]
f = R.recordingFiles[recNo-1]
secsThisRecording = (MP3(f) if f.lower().endswith(f"{os.extsep}mp3") else WAVE(f)).info.length
durations.append(secsThisRecording)
if R.mp3_recode: sys.stderr.write(f"Adding {recNo:04d}.mp3..."),sys.stderr.flush()
z.writestr(f"{recNo:04d}.mp3",recordings[recNo-1].result() if R.mp3_recode else open(R.recordingFiles[recNo-1],'rb').read())
Expand Down

0 comments on commit 21c918f

Please sign in to comment.