From 21c918fe6f2947dbce1ac465eac906d71ee2a55a Mon Sep 17 00:00:00 2001 From: "Silas S. Brown" Date: Mon, 15 Apr 2024 07:55:33 +0100 Subject: [PATCH] Anemone fix wav input --- README.md | 4 ++-- anemone.py | 16 +++++++++------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 1bf59bb..fc50e69 100644 --- a/README.md +++ b/README.md @@ -36,9 +36,9 @@ from http://ssb22.user.srcf.net/indexer/anemone.html `anemone.py` is a Python 3 script to put together a DAISY digital talking book, from HTML text, MP3 audio recordings and time index data. It produces DAISY 2.02 files by default, or DAISY 3 (i.e. ANSI/NISO Z39.86) if an option is set. It currently can produce one of two different types of digital talking book: -1. Full audio with basic Navigation Control Centre only: this requires a list of MP3 files for the audio, one MP3 per section, and the title of each section can be placed either in a separate text file or in the filename of the MP3 file. +1. Full audio with basic Navigation Control Centre only: this requires a list of MP3 or WAV files for the audio, one per section, and the title of each section can be placed either in a separate text file or in the filename of the audio file. -2. Full audio with full text: this requires MP3 files for the audio, corresponding XHTML files for the text, and corresponding JSON files for the timing synchronisation. Each JSON file is expected to contain a list called `"markers"` whose items contain `"id"` (or `"paragraphId"` or anything else ending id) and `"time"` (or `"startTime"` or anything else ending time), which can be in seconds, minutes:seconds or hours:minutes:seconds (fractions of a second are allowed in each case). The IDs in these JSON files should have corresponding attributes in the XHTML, by default data-pid but this can be changed with an option. +2. Full audio with full text: this requires MP3 or WAV files for the audio, corresponding XHTML files for the text, and corresponding JSON files for the timing synchronisation. Each JSON file is expected to contain a list called `"markers"` whose items contain `"id"` (or `"paragraphId"` or anything else ending id) and `"time"` (or `"startTime"` or anything else ending time), which can be in seconds, minutes:seconds or hours:minutes:seconds (fractions of a second are allowed in each case). The IDs in these JSON files should have corresponding attributes in the XHTML, by default data-pid but this can be changed with an option. All files are placed on the command line (or in parameters if you're using Anemone as a module), and Anemone assumes the correspondences are ordered. So for example if MP3, HTML and JSON files are given, Anemone assumes the first-listed MP3 file corresponds with the first-listed HTML file and the first-listed JSON file, and so on for the second, third, etc. With most sensible file naming schemes, you should be able to use shell wildcards like `*` when passing the files to Anemone. You may also set the name of an output file ending `zip`; the suffix `_daisy.zip` is common. The title, publisher, language etc of the book should be set via options: run the program with `--help` to see all. diff --git a/anemone.py b/anemone.py index 068812f..64b2cfe 100644 --- a/anemone.py +++ b/anemone.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Anemone 1.41 (http://ssb22.user.srcf.net/anemone) +Anemone 1.42 (http://ssb22.user.srcf.net/anemone) (c) 2023-24 Silas S. Brown. License: Apache 2 Run program with --help for usage instructions. """ @@ -106,6 +106,7 @@ class AnemoneError(Exception): pass try: from mutagen.mp3 import MP3 except ImportError: error("Anemone needs the Mutagen library to determine MP3 play lengths.\nPlease do: pip install mutagen") +from mutagen.wave import WAVE class Run(): # INTERNAL """The parameters we need for an Anemone run. @@ -123,7 +124,7 @@ def __init__(R,*inFiles,**kwargs): else: R.__dict__.update(get_argument_parser().parse_args().__dict__) for f in R.files: f = f.strip() - if f.endswith(f"{os.extsep}zip"): + if f.lower().endswith(f"{os.extsep}zip"): if R.outputFile: error(f"Only one {os.extsep}zip output file may be specified") R.outputFile = f ; continue if re.match("https?://",f): @@ -136,13 +137,13 @@ def __init__(R,*inFiles,**kwargs): elif f.startswith('<') and f.endswith('>'): R.htmlData.append(f) ; continue elif not os.path.exists(f): error(f"File not found: {f}") - if f.endswith(f"{os.extsep}mp3") or f.endswith(f"{os.extsep}wav"): + if f.lower().endswith(f"{os.extsep}mp3") or f.lower().endswith(f"{os.extsep}wav"): if f.endswith(f"{os.extsep}wav") and not R.mp3_recode: error("wav input requires mp3 recode to be set") R.recordingFiles.append(f) - elif f.endswith(f"{os.extsep}json"): R.jsonData.append(json.load(open(f,encoding="utf-8"))) - elif f.endswith(f"{os.extsep}txt"): + elif f.lower().endswith(f"{os.extsep}json"): R.jsonData.append(json.load(open(f,encoding="utf-8"))) + elif f.lower().endswith(f"{os.extsep}txt"): R.textFiles.append(f) - elif f.endswith(f"{os.extsep}html") or not os.extsep in f.rsplit(os.sep,1)[-1]: + elif f.lower().endswith(f"{os.extsep}html") or not os.extsep in f.rsplit(os.sep,1)[-1]: R.htmlData.append(open(f,encoding="utf-8").read()) else: error(f"Can't handle '{f}'") if not R.recordingFiles: error("Creating DAISY files without audio is not yet implemented") @@ -313,8 +314,9 @@ def D(s): return s.replace("\n","\r\n") # in case old readers require DOS line e secsSoFar = 0 durations = [] ; curP = 1 for recNo in range(1,len(recordingTexts)+1): - secsThisRecording = MP3(R.recordingFiles[recNo-1]).info.length rTxt = recordingTexts[recNo-1] + f = R.recordingFiles[recNo-1] + secsThisRecording = (MP3(f) if f.lower().endswith(f"{os.extsep}mp3") else WAVE(f)).info.length durations.append(secsThisRecording) if R.mp3_recode: sys.stderr.write(f"Adding {recNo:04d}.mp3..."),sys.stderr.flush() z.writestr(f"{recNo:04d}.mp3",recordings[recNo-1].result() if R.mp3_recode else open(R.recordingFiles[recNo-1],'rb').read())