Anemone fix wav input

ssb22 · Apr 15, 2024 · 21c918f · 21c918f
1 parent 9360664
commit 21c918f
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 9 deletions.
diff --git a/README.md b/README.md
@@ -36,9 +36,9 @@ from http://ssb22.user.srcf.net/indexer/anemone.html
 
 `anemone.py` is a Python 3 script to put together a DAISY digital talking book, from HTML text, MP3 audio recordings and time index data.  It produces DAISY 2.02 files by default, or DAISY 3 (i.e. ANSI/NISO Z39.86) if an option is set.  It currently can produce one of two different types of digital talking book:
 
-1. Full audio with basic Navigation Control Centre only: this requires a list of MP3 files for the audio, one MP3 per section, and the title of each section can be placed either in a separate text file or in the filename of the MP3 file.
+1. Full audio with basic Navigation Control Centre only: this requires a list of MP3 or WAV files for the audio, one per section, and the title of each section can be placed either in a separate text file or in the filename of the audio file.
 
-2. Full audio with full text: this requires MP3 files for the audio, corresponding XHTML files for the text, and corresponding JSON files for the timing synchronisation.  Each JSON file is expected to contain a list called `"markers"` whose items contain `"id"` (or `"paragraphId"` or anything else ending id) and `"time"` (or `"startTime"` or anything else ending time), which can be in seconds, minutes:seconds or hours:minutes:seconds (fractions of a second are allowed in each case).  The IDs in these JSON files should have corresponding attributes in the XHTML, by default data-pid but this can be changed with an option.
+2. Full audio with full text: this requires MP3 or WAV files for the audio, corresponding XHTML files for the text, and corresponding JSON files for the timing synchronisation.  Each JSON file is expected to contain a list called `"markers"` whose items contain `"id"` (or `"paragraphId"` or anything else ending id) and `"time"` (or `"startTime"` or anything else ending time), which can be in seconds, minutes:seconds or hours:minutes:seconds (fractions of a second are allowed in each case).  The IDs in these JSON files should have corresponding attributes in the XHTML, by default data-pid but this can be changed with an option.
 
 All files are placed on the command line (or in parameters if you're using Anemone as a module), and Anemone assumes the correspondences are ordered.  So for example if MP3, HTML and JSON files are given, Anemone assumes the first-listed MP3 file corresponds with the first-listed HTML file and the first-listed JSON file, and so on for the second, third, etc.  With most sensible file naming schemes, you should be able to use shell wildcards like `*` when passing the files to Anemone.  You may also set the name of an output file ending `zip`; the suffix `_daisy.zip` is common.  The title, publisher, language etc of the book should be set via options: run the program with `--help` to see all.
 

diff --git a/anemone.py b/anemone.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-Anemone 1.41 (http://ssb22.user.srcf.net/anemone)
+Anemone 1.42 (http://ssb22.user.srcf.net/anemone)
 (c) 2023-24 Silas S. Brown.  License: Apache 2
 Run program with --help for usage instructions.
 """
@@ -106,6 +106,7 @@ class AnemoneError(Exception): pass
 
 try: from mutagen.mp3 import MP3
 except ImportError: error("Anemone needs the Mutagen library to determine MP3 play lengths.\nPlease do: pip install mutagen")
+from mutagen.wave import WAVE
 
 class Run(): # INTERNAL
   """The parameters we need for an Anemone run.
@@ -123,7 +124,7 @@ def __init__(R,*inFiles,**kwargs):
     else: R.__dict__.update(get_argument_parser().parse_args().__dict__)
     for f in R.files:
         f = f.strip()
-        if f.endswith(f"{os.extsep}zip"):
+        if f.lower().endswith(f"{os.extsep}zip"):
             if R.outputFile: error(f"Only one {os.extsep}zip output file may be specified")
             R.outputFile = f ; continue
         if re.match("https?://",f):
@@ -136,13 +137,13 @@ def __init__(R,*inFiles,**kwargs):
         elif f.startswith('<') and f.endswith('>'):
             R.htmlData.append(f) ; continue
         elif not os.path.exists(f): error(f"File not found: {f}")
-        if f.endswith(f"{os.extsep}mp3") or f.endswith(f"{os.extsep}wav"):
+        if f.lower().endswith(f"{os.extsep}mp3") or f.lower().endswith(f"{os.extsep}wav"):
             if f.endswith(f"{os.extsep}wav") and not R.mp3_recode: error("wav input requires mp3 recode to be set")
             R.recordingFiles.append(f)
-        elif f.endswith(f"{os.extsep}json"): R.jsonData.append(json.load(open(f,encoding="utf-8")))
-        elif f.endswith(f"{os.extsep}txt"):
+        elif f.lower().endswith(f"{os.extsep}json"): R.jsonData.append(json.load(open(f,encoding="utf-8")))
+        elif f.lower().endswith(f"{os.extsep}txt"):
             R.textFiles.append(f)
-        elif f.endswith(f"{os.extsep}html") or not os.extsep in f.rsplit(os.sep,1)[-1]:
+        elif f.lower().endswith(f"{os.extsep}html") or not os.extsep in f.rsplit(os.sep,1)[-1]:
             R.htmlData.append(open(f,encoding="utf-8").read())
         else: error(f"Can't handle '{f}'")
     if not R.recordingFiles: error("Creating DAISY files without audio is not yet implemented")
@@ -313,8 +314,9 @@ def D(s): return s.replace("\n","\r\n") # in case old readers require DOS line e
     secsSoFar = 0
     durations = [] ; curP = 1
     for recNo in range(1,len(recordingTexts)+1):
-        secsThisRecording = MP3(R.recordingFiles[recNo-1]).info.length
         rTxt = recordingTexts[recNo-1]
+        f = R.recordingFiles[recNo-1]
+        secsThisRecording = (MP3(f) if f.lower().endswith(f"{os.extsep}mp3") else WAVE(f)).info.length
         durations.append(secsThisRecording)
         if R.mp3_recode: sys.stderr.write(f"Adding {recNo:04d}.mp3..."),sys.stderr.flush()
         z.writestr(f"{recNo:04d}.mp3",recordings[recNo-1].result() if R.mp3_recode else open(R.recordingFiles[recNo-1],'rb').read())