Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds TTS to announcements made by the Captain and silicons #72300

Closed
wants to merge 46 commits into from
Closed
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
6f4c8c1
done-tts-stuff
Dec 27, 2022
b97140b
Adds TTS
Dec 27, 2022
51ae564
adds documentation + moves things over to config
Dec 27, 2022
a86a887
sanitize
Dec 28, 2022
69a2139
Changes
Dec 28, 2022
44cca21
Progress
Dec 28, 2022
9972a32
Config tweaks
Dec 28, 2022
52b3d77
Removes ED
Dec 28, 2022
0e1fd99
Fixes configs again
Dec 28, 2022
77bc0c2
Bugfixes
Dec 28, 2022
b41040c
Adds caching
Dec 28, 2022
c72795a
sort-of fixes stuttering
Dec 28, 2022
6d5290e
Changes how voices are cached
Dec 28, 2022
7a75ebd
performance fixes
Dec 28, 2022
0830e0a
Moves tts to be handled in preferences, also lowers the volume a bit
Dec 28, 2022
a0668c6
Adds TTS voice preferences, applies tts filters to borgs, makes langu…
Dec 29, 2022
d293333
Adds a voice to a mob if a client logs into it
Dec 29, 2022
37d8fad
Fixes CI
Dec 29, 2022
f74d7f8
Uses torch.no_grad
Dec 30, 2022
6c9675a
Intel accelerated python. Fileless python ffmpeg handling. misc other…
MrStonedOne Dec 31, 2022
8b6f4d1
Changes the python library.
Dec 31, 2022
706060a
Implements priority queue
Dec 31, 2022
e772672
Updates python serve function
Dec 31, 2022
77d0452
Correctly removes cached voices
Jan 1, 2023
0e5b692
Further cache fixes
Jan 1, 2023
7afb190
Removes warning
Jan 1, 2023
7b56093
Sets max concurrent requests to 5
Jan 1, 2023
9811206
Apply suggestions from code review
Watermelon914 Jan 2, 2023
dc786ec
Changes
Jan 2, 2023
f83364f
Fixes config
Jan 2, 2023
90027e8
[WIP] HTML audio subsystem (#11)
Iamgoofball Jan 3, 2023
63b6854
Bugfixes
Jan 3, 2023
b8fc282
Merge branch 'tts' of github.com:Watermelon914/tgstation into tts
Jan 3, 2023
d16026c
Fixes
Jan 3, 2023
d8308aa
Adds punctuation
Jan 4, 2023
ae29825
Updates the documentation
Jan 4, 2023
1652e07
Removes goof's radios
Jan 4, 2023
bdd25a6
Linter fixes
Jan 4, 2023
715752a
Tts (#13)
Iamgoofball Jan 12, 2023
a041c62
Removes vits library
Mar 12, 2023
04b8276
Reverts back to coqui library, reworks so that TTS is only applied to…
Mar 12, 2023
2217d11
Fixes conflicts
Mar 12, 2023
0c2e87c
TTS disabled by default
Mar 12, 2023
07b7f2c
Fixes TTS being off by default
Mar 12, 2023
9bd4b87
Triples the word limit on TTS
Mar 12, 2023
c363b7a
Pushes timeout a little bit back
Mar 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions code/__DEFINES/subsystems.dm
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@
#define INIT_ORDER_ECONOMY 40
#define INIT_ORDER_OUTPUTS 35
#define INIT_ORDER_RESTAURANT 34
#define INIT_ORDER_TTS 31 // Needs to be before SSAtoms init
#define INIT_ORDER_ATOMS 30
#define INIT_ORDER_LANGUAGE 25
#define INIT_ORDER_MACHINES 20
Expand Down
2 changes: 2 additions & 0 deletions code/controllers/configuration/entries/game_options.dm
Original file line number Diff line number Diff line change
Expand Up @@ -412,3 +412,5 @@

/datum/config_entry/flag/disallow_circuit_sounds

/datum/config_entry/string/tts_http_url
protection = CONFIG_ENTRY_LOCKED
151 changes: 151 additions & 0 deletions code/controllers/subsystem/tts.dm
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
/*!
* Copyright (c) 2020 Aleksej Komarov
* SPDX-License-Identifier: MIT
*/

/proc/tts_filter(text)
// Only allow alphanumeric characters and whitespace
var/static/regex/bad_chars_regex = regex("\[^a-zA-Z0-9 ,?.!'&]", "g")
return bad_chars_regex.Replace(text, " ")


#define TARGET_INDEX 1
#define IDENTIFIER_INDEX 2
#define TIMEOUT_INDEX 3
#define REQUEST_INDEX 4

SUBSYSTEM_DEF(tts)
name = "Text To Speech"
wait = 0.1 SECONDS
init_order = INIT_ORDER_TTS

/// Queued HTTP requests that have yet to be sent
var/list/queued_tts_messages = list()

/// Queued HTTP requests that have yet to be sent. Takes priority over queued_tts_messages
var/list/priority_queued_tts_messages = list()

/// HTTP requests currently in progress but not being processed yet
var/list/in_process_tts_messages = list()

/// HTTP requests that are being processed to see if they've been finished
var/list/current_processing_tts_messages = list()

/// A list of available speakers
var/list/available_speakers = list()

/// Whether TTS is enabled or not
var/tts_enabled = FALSE

var/message_timeout = 5 SECONDS

var/max_processing_at_once = 15

/datum/controller/subsystem/tts/vv_edit_var(var_name, var_value)
// tts being enabled depends on whether it actually exists
if(NAMEOF(src, tts_enabled) == var_name)
return FALSE
return ..()

/datum/controller/subsystem/tts/Initialize()
if(!CONFIG_GET(string/tts_http_url))
return SS_INIT_NO_NEED

var/datum/http_request/request = new()
request.prepare(RUSTG_HTTP_METHOD_GET, "[CONFIG_GET(string/tts_http_url)]/tts-voices", "", "")
request.begin_async()
UNTIL(request.is_complete())
var/datum/http_response/response = request.into_response()
if(response.errored || response.status_code != 200)
return SS_INIT_NO_NEED
available_speakers = json_decode(response.body)
available_speakers -= "ED\n" // TODO: properly fix this
tts_enabled = TRUE

return SS_INIT_SUCCESS

/datum/controller/subsystem/tts/fire(resumed)
if(!tts_enabled)
flags |= SS_NO_FIRE
return

if(!resumed)
var/list/priority_list = priority_queued_tts_messages
while(length(in_process_tts_messages) < max_processing_at_once && priority_list.len > 0)
var/list/entry = priority_list[priority_list.len]
priority_list.len--
var/datum/http_request/request = entry[REQUEST_INDEX]
request.begin_async()
in_process_tts_messages += list(entry)
var/list/less_priority_list = queued_tts_messages
while(length(in_process_tts_messages) < max_processing_at_once && less_priority_list.len > 0)
var/list/entry = less_priority_list[less_priority_list.len]
less_priority_list.len--
var/datum/http_request/request = entry[REQUEST_INDEX]
request.begin_async()
in_process_tts_messages += list(entry)
current_processing_tts_messages = in_process_tts_messages.Copy()

// For speed
var/list/processing_messages = current_processing_tts_messages
while(processing_messages.len)
var/current_message = processing_messages[processing_messages.len]
processing_messages.len--
var/atom/movable/target = current_message[TARGET_INDEX]
if(QDELETED(target))
in_process_tts_messages -= list(current_message)
continue

var/datum/http_request/request = current_message[REQUEST_INDEX]
if(!request.is_complete())
if(current_message[TIMEOUT_INDEX] < world.time)
in_process_tts_messages -= list(current_message)
continue

var/datum/http_response/response = request.into_response()
in_process_tts_messages -= list(current_message)
if(response.errored)
continue
var/sound/new_sound = new("tmp/[current_message[IDENTIFIER_INDEX]].ogg")
playsound(current_message[TARGET_INDEX], new_sound, 100, ignore_walls = FALSE)
fdel(file("tmp/[current_message[IDENTIFIER_INDEX]].ogg"))
Watermelon914 marked this conversation as resolved.
Show resolved Hide resolved
if(MC_TICK_CHECK)
return

/datum/controller/subsystem/tts/proc/queue_tts_message(target, message, speaker, filter)
if(!tts_enabled)
return

var/static/regex/contains_alphanumeric = regex("\[a-zA-Z0-9]", "g")
// If there is no alphanumeric char, the output will usually be static, so
// don't bother sending
if(contains_alphanumeric.Find(message) == 0)
return

var/shell_scrubbed_input = tts_filter(message)
shell_scrubbed_input = copytext(shell_scrubbed_input, 1, 100)
var/identifier = md5(speaker + shell_scrubbed_input + filter)
if(!(speaker in available_speakers))
CRASH("Tried to use invalid speaker for TTS message! ([speaker])")
speaker = tts_filter(speaker)

var/list/headers = list()
headers["Content-Type"] = "application/json"
var/datum/http_request/request = new()
var/file_name = "tmp/[identifier].ogg"
Watermelon914 marked this conversation as resolved.
Show resolved Hide resolved
request.prepare(RUSTG_HTTP_METHOD_GET, "[CONFIG_GET(string/tts_http_url)]/tts?voice=[speaker]&identifier=[identifier]&filter=[url_encode(filter)]", json_encode(list("text" = shell_scrubbed_input)), headers, file_name)
var/list/waiting_list = queued_tts_messages
if(length(in_process_tts_messages) < max_processing_at_once)
request.begin_async()
waiting_list = in_process_tts_messages
else if(ismob(target))
var/mob/target_mob = target
if(target_mob.client != null)
waiting_list = priority_queued_tts_messages

waiting_list += list(list(target, identifier, world.time + message_timeout, request))

#undef TARGET_INDEX
#undef IDENTIFIER_INDEX
#undef TIMEOUT_INDEX
#undef REQUEST_INDEX
9 changes: 9 additions & 0 deletions code/game/atoms_movable.dm
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@
/// The degree of pressure protection that mobs in list/contents have from the external environment, between 0 and 1
var/contents_pressure_protection = 0

/// The voice that this movable makes when speaking
var/voice

var/voice_filter = ""

/mutable_appearance/emissive_blocker

/mutable_appearance/emissive_blocker/New()
Expand All @@ -104,6 +109,10 @@

/atom/movable/Initialize(mapload)
. = ..()

if(!voice && SStts.tts_enabled)
voice = pick(SStts.available_speakers)

switch(blocks_emissive)
if(EMISSIVE_BLOCK_GENERIC)
var/static/mutable_appearance/emissive_blocker/blocker = new()
Expand Down
5 changes: 5 additions & 0 deletions code/game/say.dm
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,16 @@ GLOBAL_LIST_INIT(freqtospan, list(
return TRUE

/atom/movable/proc/send_speech(message, range = 7, obj/source = src, bubble_type, list/spans, datum/language/message_language, list/message_mods = list(), forced = FALSE)
var/found_client = FALSE
for(var/atom/movable/hearing_movable as anything in get_hearers_in_view(range, source))
if(!hearing_movable)//theoretically this should use as anything because it shouldnt be able to get nulls but there are reports that it does.
stack_trace("somehow theres a null returned from get_hearers_in_view() in send_speech!")
continue
hearing_movable.Hear(null, src, message_language, message, null, spans, message_mods, range)
if(!found_client && (hearing_movable in GLOB.player_list))
found_client = TRUE
if(src.voice && found_client)
INVOKE_ASYNC(SStts, TYPE_PROC_REF(/datum/controller/subsystem/tts, queue_tts_message), src, html_decode(message), src.voice, src.voice_filter)

/atom/movable/proc/compose_message(atom/movable/speaker, datum/language/message_language, raw_message, radio_freq, list/spans, list/message_mods = list(), face_name = FALSE)
//This proc uses text() because it is faster than appending strings. Thanks BYOND.
Expand Down
11 changes: 9 additions & 2 deletions code/modules/mob/living/living_say.dm
Original file line number Diff line number Diff line change
Expand Up @@ -360,10 +360,17 @@ GLOBAL_LIST_INIT(message_modes_stat_limits, list(

//speech bubble
var/list/speech_bubble_recipients = list()
var/found_client = FALSE
var/talk_icon_state = say_test(message_raw)
for(var/mob/M in listening)
if(M.client && (!M.client.prefs.read_preference(/datum/preference/toggle/enable_runechat) || (SSlag_switch.measures[DISABLE_RUNECHAT] && !HAS_TRAIT(src, TRAIT_BYPASS_MEASURES))))
speech_bubble_recipients.Add(M.client)
if(M.client)
if(!M.client.prefs.read_preference(/datum/preference/toggle/enable_runechat) || (SSlag_switch.measures[DISABLE_RUNECHAT] && !HAS_TRAIT(src, TRAIT_BYPASS_MEASURES)))
speech_bubble_recipients.Add(M.client)
found_client = TRUE

if(src.voice && found_client)
INVOKE_ASYNC(SStts, TYPE_PROC_REF(/datum/controller/subsystem/tts, queue_tts_message), src, html_decode(message_raw), src.voice, src.voice_filter)


var/image/say_popup = image('icons/mob/effects/talk.dmi', src, "[bubble_type][talk_icon_state]", FLY_LAYER)
SET_PLANE_EXPLICIT(say_popup, ABOVE_GAME_PLANE, src)
Expand Down
3 changes: 3 additions & 0 deletions config/config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -632,3 +632,6 @@ PR_ANNOUNCEMENTS_PER_ROUND 5

## Uncomment to block granting profiling privileges to users with R_DEBUG, for performance purposes
#FORBID_ADMIN_PROFILING

## Link to a HTTP server that's been set up on a server. Docker-compose file can be found in tools/tts
#TTS_HTTP_URL http://localhost:5002
1 change: 1 addition & 0 deletions tgstation.dme
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,7 @@
#include "code\controllers\subsystem\timer.dm"
#include "code\controllers\subsystem\title.dm"
#include "code\controllers\subsystem\traitor.dm"
#include "code\controllers\subsystem\tts.dm"
#include "code\controllers\subsystem\verb_manager.dm"
#include "code\controllers\subsystem\vis_overlays.dm"
#include "code\controllers\subsystem\vote.dm"
Expand Down
11 changes: 11 additions & 0 deletions tools/tts/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM ghcr.io/coqui-ai/tts-cpu:a31af762e8ad4d8551c67b7f15e80fad1a21ebd2
# install ffmpeg
RUN apt-get update && apt-get install -y ffmpeg
RUN pip install Flask
RUN pip install ffmpeg-python
RUN pip install waitress
COPY . /root
RUN mkdir /tts_files
WORKDIR /root

ENTRYPOINT [ "python3", "tts-api.py" ]
10 changes: 10 additions & 0 deletions tools/tts/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
services:
tts:
container_name: tts
Watermelon914 marked this conversation as resolved.
Show resolved Hide resolved
build: .
ports:
- "5002:5002"
tty: true
volumes:
- "../../data/tts_cache:/root/.local"
restart: always
6 changes: 6 additions & 0 deletions tools/tts/documentation.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# TTS library
Can be found [here](https://github.com/coqui-ai/TTS)
Documentation for the library can be found [here](https://tts.readthedocs.io/en/latest/)

## Basic documentation
To run, simply do `docker-compose up -d`
37 changes: 37 additions & 0 deletions tools/tts/tts-api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from flask import Flask, request, send_file
from TTS.api import TTS
import os
import json
import shlex

tts = TTS("tts_models/en/vctk/vits", progress_bar=False, gpu=False)

app = Flask(__name__)

@app.route("/tts")
def text_to_speech():
voice = request.args.get("voice", '')
text = request.json.get("text", '')
identifier = bytes.fromhex(request.args.get("identifier", '')).hex()
filter_complex = request.args.get("filter", '')
filter_complex = filter_complex.replace("\"", "")

filter_statement = ""
if filter_complex != "":
filter_statement = "-filter_complex " + shlex.quote(filter_complex)

wav_file_loc = f'/tts_files/{identifier}.wav'
ogg_file_loc = f'/tts_files/{identifier}.ogg'

tts.tts_to_file(text=text, speaker=voice, file_path=f"/tts_files/{identifier}.wav")
os.system(f"ffmpeg -i {shlex.quote(wav_file_loc)} {filter_statement} -c:a libvorbis -b:a 64k {shlex.quote(ogg_file_loc)} -y")
os.remove(wav_file_loc)
return send_file(ogg_file_loc, mimetype="audio/wav")

@app.route("/tts-voices")
def voices_list():
return json.dumps(tts.speakers)

if __name__ == "__main__":
from waitress import serve
serve(app, host="0.0.0.0", port=5002)