-
Notifications
You must be signed in to change notification settings - Fork 10
/
aws-polly.el
167 lines (142 loc) · 7.15 KB
/
aws-polly.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
;;; aws-polly --- Speak text with AWS Polly
;;; Commentary:
;;;
;;; A simple interface for AWS’ text-to-speech API. All text is sent to Amazon’s severs so keep
;;; privacy implications in mind before using this.
;;; Code:
(require 'emms)
(require 'guess-language)
(require 'pandoc-mode-utils)
(defgroup aws-polly nil
"Run AWS Polly for Text to Speech"
:group 'tools)
;; AWS offers various voices to choose from
(defcustom aws-polly-voices '(("Amy" "en-GB") ("Brian" "en-GB") ("Joanna" "en-US") ("Mizuki" "ja-JP")
("Filiz" "tr-TR") ("Astrid" "sv-SE") ("Maxim" "ru-RU") ("Tatyana" "ru-RU")
("Carmen" "ro-RO") ("Ines" "pt-PT") ("Cristiano" "pt-PT") ("Vitoria" "pt-BR")
("Ricardo" "pt-BR") ("Maja" "pl-PL") ("Jan" "pl-PL") ("Ewa" "pl-PL")
("Ruben" "nl-NL") ("Lotte" "nl-NL") ("Liv" "nb-NO") ("Giorgio" "it-IT")
("Carla" "it-IT") ("Karl" "is-IS") ("Dora" "is-IS") ("Mathieu" "fr-FR")
("Celine" "fr-FR") ("Chantal" "fr-CA") ("Penelope" "es-US") ("Miguel" "es-US")
("Enrique" "es-ES") ("Conchita" "es-ES") ("Geraint" "en-GB-WLS") ("Salli" "en-US")
("Kimberly" "en-US") ("Kendra" "en-US") ("Justin" "en-US") ("Joey" "en-US")
("Ivy" "en-US") ("Raveena" "en-IN") ("Emma" "en-GB") ("Russell" "en-AU")
("Nicole" "en-AU") ("Marlene" "de-DE") ("Hans" "de-DE") ("Naja" "da-DK")
("Mads" "da-DK") ("Gwyneth" " cy-GB") ("Jacek" "pl-PL"))
"Voices to use in AWS polly in order of preference."
:group 'aws-polly
:type '(alist :value-type (string string)))
;; We call the command line client which can be obtained by =pip install awscli=
(defcustom aws-polly-command "aws polly synthesize-speech --output-format mp3 --voice-id %s --text \"%s\" %s"
"Command to run AWS polly."
:group 'aws-polly
:type 'string)
(defun aws-polly-is-quote (text)
"Decide if TEXT is a quote."
(equal (and (>= (length text) 2) (substring text 0 2)) " "))
;; Return the first voice matching detected language.
(defun aws-polly-select-voice (text)
"Select voice by picking first voice from aws-polly-voices matching detected language in TEXT."
(let ((lang (with-temp-buffer
(insert text)
(replace-regexp-in-string "_" "-" (cadr (assq (guess-language-buffer) guess-language-langcodes)))))
(voices nil))
(dolist (entry aws-polly-voices)
(if (string-prefix-p lang (cadr entry))
(push (car entry) voices)))
(setq voices (nreverse voices))
(cond
((eq (length voices) 0) (caar aws-polly-voices))
((eq (length voices) 1) (car voices))
(t (if (aws-polly-is-quote text)
(cadr voices)
(car voices))))))
(defun aws-polly-voices-completing-read ()
"Offer list of AWS Polly voices to choose from and return choice."
(replace-regexp-in-string
" .*$" "" (completing-read "Voice: "
(mapcar (lambda (x) (format "%s (%s)" (car x) (cadr x)))
aws-polly-voices) nil t)))
;; AWS Polly will not read any text longer than 1500 characters as of writing.
(defvar aws-polly-character-limit 1500
"Number of characters accepted by AWS polly.")
(defun aws-polly-pandoc-convert (writer &optional buffer beginning end reader)
"Output WRITER formatted string of BUFFER between BEGINNING and END parsed using READER."
(let* ((buffer (or buffer (current-buffer)))
(pandoc-buffer (get-buffer-create pandoc--output-buffer-name))
(begginning (or beginning (point-min)))
(end (or end (point-max)))
(reader (or reader (cdr (assq major-mode pandoc-major-modes))))
(text))
(with-current-buffer pandoc-buffer
(erase-buffer))
(with-current-buffer buffer
(call-process-region beginning end "pandoc" nil pandoc-buffer t
"--read"
reader
"--write"
writer
"--quiet"
"--wrap=none"))
(with-current-buffer pandoc-buffer
(setq text (buffer-string)))
text))
(defun aws-polly-plaintextify (beginning end)
"Call pandoc to convert our buffer to plain text.
This kills links etc. which we typically do not want read out.
Region between BEGINNING and END is converted."
(let ((pandoc-use-async nil)
(reader (cdr (assq major-mode pandoc-major-modes)))
(text))
(if reader
(replace-regexp-in-string "_" ""
(aws-polly-pandoc-convert "plain" (current-buffer) beginning end reader))
(buffer-substring beginning end))))
;; We may want to add some silence between paragraphs.
(defcustom aws-polly-make-silence-command
"ffmpeg -f lavfi -y -i anullsrc=r=22050:cl=mono -t %f -q:a 9 -acodec libmp3lame %s"
"Command to make moments of silence."
:group 'aws-polly
:type 'string)
(defun aws-polly-text-postprocess (text)
"Postprocess TEXT before passing it to polly proper."
(replace-regexp-in-string "^- \\(.*\\)" "\\1" (replace-regexp-in-string "^> \\(.*\\)" "\\1" text)))
(defun aws-polly-make-silence (length output-filename)
"Write LENGTH seconds of silence to OUTPUT-FILENAME."
(call-process-shell-command (format aws-polly-make-silence-command length output-filename) nil nil nil))
;;;###autoload
(defun aws-polly-region (arg)
"Speak text with AWS polly.
When no region is active the current paragraph is used. When
prefix argument is given ask for voice first.
When ARG is given, allow to select the voice first."
(interactive "P")
(let* ((beginning (if (not (use-region-p))
(save-excursion (backward-paragraph) (point))
(region-beginning)))
(end (if (not (use-region-p))
(save-excursion (forward-paragraph) (point))
(region-end)))
(texts (split-string (aws-polly-plaintextify beginning end) "\n\n"))
(silence (make-temp-file "emacs-aws-polly-silence" nil ".mp3"))
(files nil)
(set-voice (if arg (aws-polly-voices-completing-read) nil)))
(aws-polly-make-silence 1.0 silence)
(dolist (text texts)
(if (> (length text) aws-polly-character-limit)
(error "AWS polly will only accept up 1500 characters but got %d \"%s\"" (length text) (substring text 0 32))))
(dolist (text texts)
(let ((voice (if set-voice set-voice (aws-polly-select-voice text)))
(output-filename (make-temp-file "emacs-aws-polly" nil ".mp3")))
(call-process-shell-command
(format aws-polly-command
voice
(aws-polly-text-postprocess text) output-filename)
nil nil nil)
(setq files (append files (list output-filename silence)))))
(dolist (file files)
(let ((current-prefix-arg nil))
(emms-add-file file)))
(emms-start)))
(provide 'aws-polly)
;;; aws-polly.el ends here