tgstation · Watermelon914 · Dec 27, 2022 · Dec 27, 2022 · Dec 27, 2022 · Dec 28, 2022
diff --git a/code/__DEFINES/subsystems.dm b/code/__DEFINES/subsystems.dm
@@ -160,6 +160,7 @@
 #define INIT_ORDER_ECONOMY 40
 #define INIT_ORDER_OUTPUTS 35
 #define INIT_ORDER_RESTAURANT 34
+#define INIT_ORDER_TTS 31 // Needs to be before SSAtoms init
 #define INIT_ORDER_ATOMS 30
 #define INIT_ORDER_LANGUAGE 25
 #define INIT_ORDER_MACHINES 20

diff --git a/code/controllers/configuration/entries/game_options.dm b/code/controllers/configuration/entries/game_options.dm
@@ -412,3 +412,5 @@
 
 /datum/config_entry/flag/disallow_circuit_sounds
 
+/datum/config_entry/string/tts_http_url
+	protection = CONFIG_ENTRY_LOCKED
@@ -0,0 +1,151 @@
+/*!
+ * Copyright (c) 2020 Aleksej Komarov
+ * SPDX-License-Identifier: MIT
+ */
+
+/proc/tts_filter(text)
+	// Only allow alphanumeric characters and whitespace
+	var/static/regex/bad_chars_regex = regex("\[^a-zA-Z0-9 ,?.!'&]", "g")
+	return bad_chars_regex.Replace(text, " ")
+
+
+#define TARGET_INDEX 1
+#define IDENTIFIER_INDEX 2
+#define TIMEOUT_INDEX 3
+#define REQUEST_INDEX 4
+
+SUBSYSTEM_DEF(tts)
+	name = "Text To Speech"
+	wait = 0.1 SECONDS
+	init_order = INIT_ORDER_TTS
+
+	/// Queued HTTP requests that have yet to be sent
+	var/list/queued_tts_messages = list()
+
+	/// Queued HTTP requests that have yet to be sent. Takes priority over queued_tts_messages
+	var/list/priority_queued_tts_messages = list()
+
+	/// HTTP requests currently in progress but not being processed yet
+	var/list/in_process_tts_messages = list()
+
+	/// HTTP requests that are being processed to see if they've been finished
+	var/list/current_processing_tts_messages = list()
+
+	/// A list of available speakers
+	var/list/available_speakers = list()
+
+	/// Whether TTS is enabled or not
+	var/tts_enabled = FALSE
+
+	var/message_timeout = 5 SECONDS
+
+	var/max_processing_at_once = 15
+
+/datum/controller/subsystem/tts/vv_edit_var(var_name, var_value)
+	// tts being enabled depends on whether it actually exists
+	if(NAMEOF(src, tts_enabled) == var_name)
+		return FALSE
+	return ..()
+
+/datum/controller/subsystem/tts/Initialize()
+	if(!CONFIG_GET(string/tts_http_url))
+		return SS_INIT_NO_NEED
+
+	var/datum/http_request/request = new()
+	request.prepare(RUSTG_HTTP_METHOD_GET, "[CONFIG_GET(string/tts_http_url)]/tts-voices", "", "")
+	request.begin_async()
+	UNTIL(request.is_complete())
+	var/datum/http_response/response = request.into_response()
+	if(response.errored || response.status_code != 200)
+		return SS_INIT_NO_NEED
+	available_speakers = json_decode(response.body)
+	available_speakers -= "ED\n" // TODO: properly fix this
+	tts_enabled = TRUE
+
+	return SS_INIT_SUCCESS
+
+/datum/controller/subsystem/tts/fire(resumed)
+	if(!tts_enabled)
+		flags |= SS_NO_FIRE
+		return
+
+	if(!resumed)
+		var/list/priority_list = priority_queued_tts_messages
+		while(length(in_process_tts_messages) < max_processing_at_once && priority_list.len > 0)
+			var/list/entry = priority_list[priority_list.len]
+			priority_list.len--
+			var/datum/http_request/request = entry[REQUEST_INDEX]
+			request.begin_async()
+			in_process_tts_messages += list(entry)
+		var/list/less_priority_list = queued_tts_messages
+		while(length(in_process_tts_messages) < max_processing_at_once && less_priority_list.len > 0)
+			var/list/entry = less_priority_list[less_priority_list.len]
+			less_priority_list.len--
+			var/datum/http_request/request = entry[REQUEST_INDEX]
+			request.begin_async()
+			in_process_tts_messages += list(entry)
+		current_processing_tts_messages = in_process_tts_messages.Copy()
+
+	// For speed
+	var/list/processing_messages = current_processing_tts_messages
+	while(processing_messages.len)
+		var/current_message = processing_messages[processing_messages.len]
+		processing_messages.len--
+		var/atom/movable/target = current_message[TARGET_INDEX]
+		if(QDELETED(target))
+			in_process_tts_messages -= list(current_message)
+			continue
+
+		var/datum/http_request/request = current_message[REQUEST_INDEX]
+		if(!request.is_complete())
+			if(current_message[TIMEOUT_INDEX] < world.time)
+				in_process_tts_messages -= list(current_message)
+			continue
+
+		var/datum/http_response/response = request.into_response()
+		in_process_tts_messages -= list(current_message)
+		if(response.errored)
+			continue
+		var/sound/new_sound = new("tmp/[current_message[IDENTIFIER_INDEX]].ogg")
+		playsound(current_message[TARGET_INDEX], new_sound, 100, ignore_walls = FALSE)
+		fdel(file("tmp/[current_message[IDENTIFIER_INDEX]].ogg"))
+		if(MC_TICK_CHECK)
+			return
+
+/datum/controller/subsystem/tts/proc/queue_tts_message(target, message, speaker, filter)
+	if(!tts_enabled)
+		return
+
+	var/static/regex/contains_alphanumeric = regex("\[a-zA-Z0-9]", "g")
+	// If there is no alphanumeric char, the output will usually be static, so
+	// don't bother sending
+	if(contains_alphanumeric.Find(message) == 0)
+		return
+
+	var/shell_scrubbed_input = tts_filter(message)
+	shell_scrubbed_input = copytext(shell_scrubbed_input, 1, 100)
+	var/identifier = md5(speaker + shell_scrubbed_input + filter)
+	if(!(speaker in available_speakers))
+		CRASH("Tried to use invalid speaker for TTS message! ([speaker])")
+	speaker = tts_filter(speaker)
+
+	var/list/headers = list()
+	headers["Content-Type"] = "application/json"
+	var/datum/http_request/request = new()
+	var/file_name = "tmp/[identifier].ogg"
+	request.prepare(RUSTG_HTTP_METHOD_GET, "[CONFIG_GET(string/tts_http_url)]/tts?voice=[speaker]&identifier=[identifier]&filter=[url_encode(filter)]", json_encode(list("text" = shell_scrubbed_input)), headers, file_name)
+	var/list/waiting_list = queued_tts_messages
+	if(length(in_process_tts_messages) < max_processing_at_once)
+		request.begin_async()
+		waiting_list = in_process_tts_messages
+	else if(ismob(target))
+		var/mob/target_mob = target
+		if(target_mob.client != null)
+			waiting_list = priority_queued_tts_messages
+
+	waiting_list += list(list(target, identifier, world.time + message_timeout, request))
+
+#undef TARGET_INDEX
+#undef IDENTIFIER_INDEX
+#undef TIMEOUT_INDEX
+#undef REQUEST_INDEX
diff --git a/code/game/atoms_movable.dm b/code/game/atoms_movable.dm
@@ -94,6 +94,11 @@
 	/// The degree of pressure protection that mobs in list/contents have from the external environment, between 0 and 1
 	var/contents_pressure_protection = 0
 
+	/// The voice that this movable makes when speaking
+	var/voice
+
+	var/voice_filter = ""
+
 /mutable_appearance/emissive_blocker
 
 /mutable_appearance/emissive_blocker/New()
@@ -104,6 +109,10 @@
 
 /atom/movable/Initialize(mapload)
 	. = ..()
+
+	if(!voice && SStts.tts_enabled)
+		voice = pick(SStts.available_speakers)
+
 	switch(blocks_emissive)
 		if(EMISSIVE_BLOCK_GENERIC)
 			var/static/mutable_appearance/emissive_blocker/blocker = new()

diff --git a/code/game/say.dm b/code/game/say.dm
@@ -75,11 +75,16 @@ GLOBAL_LIST_INIT(freqtospan, list(
 	return TRUE
 
 /atom/movable/proc/send_speech(message, range = 7, obj/source = src, bubble_type, list/spans, datum/language/message_language, list/message_mods = list(), forced = FALSE)
+	var/found_client = FALSE
 	for(var/atom/movable/hearing_movable as anything in get_hearers_in_view(range, source))
 		if(!hearing_movable)//theoretically this should use as anything because it shouldnt be able to get nulls but there are reports that it does.
 			stack_trace("somehow theres a null returned from get_hearers_in_view() in send_speech!")
 			continue
 		hearing_movable.Hear(null, src, message_language, message, null, spans, message_mods, range)
+		if(!found_client && (hearing_movable in GLOB.player_list))
+			found_client = TRUE
+	if(src.voice && found_client)
+		INVOKE_ASYNC(SStts, TYPE_PROC_REF(/datum/controller/subsystem/tts, queue_tts_message), src, html_decode(message), src.voice, src.voice_filter)
 
 /atom/movable/proc/compose_message(atom/movable/speaker, datum/language/message_language, raw_message, radio_freq, list/spans, list/message_mods = list(), face_name = FALSE)
 	//This proc uses text() because it is faster than appending strings. Thanks BYOND.

diff --git a/code/modules/mob/living/living_say.dm b/code/modules/mob/living/living_say.dm
@@ -360,10 +360,17 @@ GLOBAL_LIST_INIT(message_modes_stat_limits, list(
 
 	//speech bubble
 	var/list/speech_bubble_recipients = list()
+	var/found_client = FALSE
 	var/talk_icon_state = say_test(message_raw)
 	for(var/mob/M in listening)
-		if(M.client && (!M.client.prefs.read_preference(/datum/preference/toggle/enable_runechat) || (SSlag_switch.measures[DISABLE_RUNECHAT] && !HAS_TRAIT(src, TRAIT_BYPASS_MEASURES))))
-			speech_bubble_recipients.Add(M.client)
+		if(M.client)
+			if(!M.client.prefs.read_preference(/datum/preference/toggle/enable_runechat) || (SSlag_switch.measures[DISABLE_RUNECHAT] && !HAS_TRAIT(src, TRAIT_BYPASS_MEASURES)))
+				speech_bubble_recipients.Add(M.client)
+			found_client = TRUE
+
+	if(src.voice && found_client)
+		INVOKE_ASYNC(SStts, TYPE_PROC_REF(/datum/controller/subsystem/tts, queue_tts_message), src, html_decode(message_raw), src.voice, src.voice_filter)
+
 
 	var/image/say_popup = image('icons/mob/effects/talk.dmi', src, "[bubble_type][talk_icon_state]", FLY_LAYER)
 	SET_PLANE_EXPLICIT(say_popup, ABOVE_GAME_PLANE, src)

diff --git a/config/config.txt b/config/config.txt
@@ -632,3 +632,6 @@ PR_ANNOUNCEMENTS_PER_ROUND 5
 
 ## Uncomment to block granting profiling privileges to users with R_DEBUG, for performance purposes
 #FORBID_ADMIN_PROFILING
+
+## Link to a HTTP server that's been set up on a server. Docker-compose file can be found in tools/tts
+#TTS_HTTP_URL http://localhost:5002
diff --git a/tgstation.dme b/tgstation.dme
@@ -583,6 +583,7 @@
 #include "code\controllers\subsystem\timer.dm"
 #include "code\controllers\subsystem\title.dm"
 #include "code\controllers\subsystem\traitor.dm"
+#include "code\controllers\subsystem\tts.dm"
 #include "code\controllers\subsystem\verb_manager.dm"
 #include "code\controllers\subsystem\vis_overlays.dm"
 #include "code\controllers\subsystem\vote.dm"

diff --git a/tools/tts/Dockerfile b/tools/tts/Dockerfile
@@ -0,0 +1,11 @@
+FROM ghcr.io/coqui-ai/tts-cpu:a31af762e8ad4d8551c67b7f15e80fad1a21ebd2
+# install ffmpeg
+RUN apt-get update && apt-get install -y ffmpeg
+RUN pip install Flask
+RUN pip install ffmpeg-python
+RUN pip install waitress
+COPY . /root
+RUN mkdir /tts_files
+WORKDIR /root
+
+ENTRYPOINT [ "python3", "tts-api.py" ]
diff --git a/tools/tts/docker-compose.yml b/tools/tts/docker-compose.yml
@@ -0,0 +1,10 @@
+services:
+  tts:
+    container_name: tts
+    build: .
+    ports:
+      - "5002:5002"
+    tty: true
+    volumes:
+      - "../../data/tts_cache:/root/.local"
+    restart: always
diff --git a/tools/tts/documentation.md b/tools/tts/documentation.md
@@ -0,0 +1,6 @@
+# TTS library
+Can be found [here](https://github.com/coqui-ai/TTS)
+Documentation for the library can be found [here](https://tts.readthedocs.io/en/latest/)
+
+## Basic documentation
+To run, simply do `docker-compose up -d`
diff --git a/tools/tts/tts-api.py b/tools/tts/tts-api.py
@@ -0,0 +1,37 @@
+from flask import Flask, request, send_file
+from TTS.api import TTS
+import os
+import json
+import shlex
+
+tts = TTS("tts_models/en/vctk/vits", progress_bar=False, gpu=False)
+
+app = Flask(__name__)
+
+@app.route("/tts")
+def text_to_speech():
+    voice = request.args.get("voice", '')
+    text = request.json.get("text", '')
+    identifier = bytes.fromhex(request.args.get("identifier", '')).hex()
+    filter_complex = request.args.get("filter", '')
+    filter_complex = filter_complex.replace("\"", "")
+
+    filter_statement = ""
+    if filter_complex != "":
+        filter_statement = "-filter_complex " + shlex.quote(filter_complex)
+
+    wav_file_loc = f'/tts_files/{identifier}.wav'
+    ogg_file_loc = f'/tts_files/{identifier}.ogg'
+
+    tts.tts_to_file(text=text, speaker=voice, file_path=f"/tts_files/{identifier}.wav")
+    os.system(f"ffmpeg -i {shlex.quote(wav_file_loc)} {filter_statement} -c:a libvorbis -b:a 64k {shlex.quote(ogg_file_loc)} -y")
+    os.remove(wav_file_loc)
+    return send_file(ogg_file_loc, mimetype="audio/wav")
+
+@app.route("/tts-voices")
+def voices_list():
+    return json.dumps(tts.speakers)
+
+if __name__ == "__main__":
+    from waitress import serve
+    serve(app, host="0.0.0.0", port=5002)