Skip to content

Commit

Permalink
translate to english option for whisper
Browse files Browse the repository at this point in the history
  • Loading branch information
mkiol committed Dec 9, 2023
1 parent 74e6c77 commit 2d52f56
Show file tree
Hide file tree
Showing 11 changed files with 84 additions and 12 deletions.
1 change: 1 addition & 0 deletions desktop/dsnote.metainfo.xml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
<li>New version of Faster Whisper Large model: 'FasterWhisper Large-v3'</li>
<li>Whisper and Faster Whisper enabled for Chinese-Cantonese language</li>
<li>Support for Speex audio codec in 'Transcribe a file'</li>
<li>Translate to English option for Whisper and Faster Whisper models.</li>
</ul>
<p>Text to Speech:</p>
<ul>
Expand Down
3 changes: 3 additions & 0 deletions desktop/qml/ChangelogPage.qml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ DialogPage {
<li>New version of Faster Whisper Large model: <i>FasterWhisper Large-v3</i></li>
<li>Whisper and Faster Whisper enabled for Chinese-Cantonese language (广东话)</li>
<li>Support for Speex audio codec in <i>Transcribe a file</i></li>
<li>Translate to English option for Whisper and Faster Whisper models.
To automatically translate to English, use the switch to the right of the model selection box.
The option is only visible if you select a non-English Whisper or Faster Whisper model.</li>
</ul>
<p>" + qsTr("Text to Speech") + ":</p>
<ul>
Expand Down
13 changes: 13 additions & 0 deletions desktop/qml/ComboButton.qml
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@ RowLayout {
property alias combo: _combo
property alias combo2: _combo2
property alias combo3: _combo3
property alias check: _check
property alias frame: _frame
property string comboToolTip: ""
property string combo2ToolTip: ""
property string combo3ToolTip: ""
property string checkToolTip: ""
property string buttonToolTip: ""
readonly property bool off: combo.model.length === 0
property string comboPlaceholderText: ""
Expand Down Expand Up @@ -96,6 +98,17 @@ RowLayout {
ToolTip.text: root.combo3ToolTip
}

Switch {
id: _check

visible: false
Layout.alignment: Qt.AlignVCenter | Qt.AlignRight
enabled: !root.off
ToolTip.visible: hovered
//ToolTip.delay: Qt.styleHints.mousePressAndHoldInterval
ToolTip.text: root.checkToolTip
}

Button {
id: _button

Expand Down
10 changes: 9 additions & 1 deletion desktop/qml/Notepad.qml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ ColumnLayout {

if (app.stt_configured) {
listenReadCombos.first.combo.currentIndex = app.active_stt_model_idx
listenReadCombos.first.check.checked = _settings.whisper_translate
}
if (app.tts_configured) {
listenReadCombos.second.combo.currentIndex = app.active_tts_model_idx
Expand Down Expand Up @@ -93,11 +94,19 @@ ColumnLayout {
app.state === DsnoteApp.StateListeningManual)
comboToolTip: qsTr("Speech to Text model")
comboPlaceholderText: qsTr("No Speech to Text model")
checkToolTip: qsTr("Translate to English")
combo {
model: app.available_stt_models
onActivated: app.set_active_stt_model_idx(index)
currentIndex: app.active_stt_model_idx
}
check {
visible: app.stt_translate_needed
checked: _settings.whisper_translate
onClicked: {
_settings.whisper_translate = !_settings.whisper_translate
}
}
button {
text: qsTr("Listen")
onClicked: {
Expand Down Expand Up @@ -180,7 +189,6 @@ ColumnLayout {
}
onActivated: _settings.speech_speed = index + 1
}

button {
enabled: listenReadCombos.second.enabled &&
!listenReadCombos.second.off &&
Expand Down
27 changes: 22 additions & 5 deletions src/dsnote_app.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1794,12 +1794,15 @@ void dsnote_app::transcribe_file(const QString &source_file, bool replace) {
int new_task = 0;

if (s->launch_mode() == settings::launch_mode_t::app_stanalone) {
new_task = speech_service::instance()->stt_transcribe_file(source_file,
{}, {});
new_task = speech_service::instance()->stt_transcribe_file(
source_file, {},
s->whisper_translate() ? QStringLiteral("en") : QString{});
} else {
qDebug() << "[app => dbus] call SttTranscribeFile";

new_task = m_dbus_service.SttTranscribeFile(source_file, {}, {});
new_task = m_dbus_service.SttTranscribeFile(
source_file, {},
s->whisper_translate() ? QStringLiteral("en") : QString{});
}

m_side_task.set(new_task);
Expand Down Expand Up @@ -1832,11 +1835,12 @@ void dsnote_app::listen_internal() {
if (s->launch_mode() == settings::launch_mode_t::app_stanalone) {
new_task = speech_service::instance()->stt_start_listen(
static_cast<speech_service::speech_mode_t>(s->speech_mode()), {},
{});
s->whisper_translate() ? QStringLiteral("en") : QString{});
} else {
qDebug() << "[app => dbus] call SttStartListen:" << s->speech_mode();
new_task = m_dbus_service.SttStartListen(
static_cast<int>(s->speech_mode()), {}, {});
static_cast<int>(s->speech_mode()), {},
s->whisper_translate() ? QStringLiteral("en") : QString{});
}

m_primary_task.set(new_task);
Expand Down Expand Up @@ -3157,6 +3161,15 @@ QString dsnote_app::download_content(const QUrl &url) {
return text;
}

bool dsnote_app::stt_translate_needed_by_id(const QString &id) const {
if (m_available_stt_models_map.contains(id)) {
auto model = m_available_stt_models_map.value(id).toStringList();
return model.size() > 2 && model.at(2).contains('t');
}

return false;
}

bool dsnote_app::tts_ref_voice_needed_by_id(const QString &id) const {
if (m_available_tts_models_map.contains(id)) {
auto model = m_available_tts_models_map.value(id).toStringList();
Expand All @@ -3166,6 +3179,10 @@ bool dsnote_app::tts_ref_voice_needed_by_id(const QString &id) const {
return false;
}

bool dsnote_app::stt_translate_needed() const {
return stt_translate_needed_by_id(m_active_stt_model);
}

bool dsnote_app::tts_ref_voice_needed() const {
return tts_ref_voice_needed_by_id(m_active_tts_model);
}
Expand Down
4 changes: 4 additions & 0 deletions src/dsnote_app.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ class dsnote_app : public QObject {
active_stt_model_changed)
Q_PROPERTY(QVariantList available_stt_models READ available_stt_models
NOTIFY available_stt_models_changed)
Q_PROPERTY(bool stt_translate_needed READ stt_translate_needed NOTIFY
active_stt_model_changed)

// tts ref voices
Q_PROPERTY(int active_tts_ref_voice_idx READ active_tts_ref_voice_idx NOTIFY
Expand Down Expand Up @@ -657,7 +659,9 @@ class dsnote_app : public QObject {
bool feature_text_active_window() const;
bool feature_coqui_tts() const;
void request_reload();
bool stt_translate_needed_by_id(const QString &id) const;
bool tts_ref_voice_needed_by_id(const QString &id) const;
bool stt_translate_needed() const;
bool tts_ref_voice_needed() const;
bool tts_for_in_mnt_ref_voice_needed() const;
bool tts_for_out_mnt_ref_voice_needed() const;
Expand Down
4 changes: 3 additions & 1 deletion src/fasterwhisper_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,9 @@ void fasterwhisper_engine::decode_speech(const whisper_buf_t& buf) {

auto seg_tuple = m_model->attr("transcribe")(
"audio"_a = array, "beam_size"_a = 5,
"language"_a = m_config.lang);
"language"_a = m_config.lang,
"task"_a = m_config.translate ? "translate"
: "transcribe");

auto segments = *seg_tuple.cast<py::list>().begin();

Expand Down
8 changes: 7 additions & 1 deletion src/models_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1608,6 +1608,7 @@ models_manager::feature_flags models_manager::add_explicit_feature_flags(
existing_features, feature_flags::medium_processing);
existing_features = add_new_feature(existing_features,
feature_flags::medium_quality);
break;
case model_engine_t::tts_piper:
existing_features =
add_new_feature(existing_features, feature_flags::high_quality);
Expand Down Expand Up @@ -1947,10 +1948,15 @@ auto models_manager::extract_models(
}
}

// add char replacement option for all coqui tts models
if (model.engine == model_engine_t::tts_coqui &&
!model.options.contains('c')) {
// add char replacement option for all coqui tts models
model.options.push_back('c');
} else if ((model.engine == model_engine_t::stt_whisper ||
model.engine == model_engine_t::stt_fasterwhisper) &&
!model.options.contains('t') && model.lang_id != "en") {
// add translate option for all whisper stt models
model.options.push_back('t');
}

models.emplace(model_id, std::move(model));
Expand Down
11 changes: 11 additions & 0 deletions src/settings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1437,6 +1437,17 @@ void settings::set_mnt_clean_text(bool value) {
}
}

bool settings::whisper_translate() const {
return value(QStringLiteral("whisper_translate"), false).toBool();
}

void settings::set_whisper_translate(bool value) {
if (value != whisper_translate()) {
setValue(QStringLiteral("whisper_translate"), value);
emit whisper_translate_changed();
}
}

bool settings::gpu_scan_hip() const {
return value(QStringLiteral("gpu_scan_hip"), true).toBool();
}
Expand Down
5 changes: 5 additions & 0 deletions src/settings.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ class settings : public QSettings, public singleton<settings> {
active_tts_for_out_mnt_ref_voice_changed)
Q_PROPERTY(bool mnt_clean_text READ mnt_clean_text WRITE set_mnt_clean_text
NOTIFY mnt_clean_text_changed)
Q_PROPERTY(bool whisper_translate READ whisper_translate WRITE
set_whisper_translate NOTIFY whisper_translate_changed)

// service
Q_PROPERTY(QString models_dir READ models_dir WRITE set_models_dir NOTIFY
Expand Down Expand Up @@ -372,6 +374,8 @@ class settings : public QSettings, public singleton<settings> {
void set_active_tts_for_out_mnt_ref_voice(const QString &value);
bool mnt_clean_text() const;
void set_mnt_clean_text(bool value);
bool whisper_translate() const;
void set_whisper_translate(bool value);

Q_INVOKABLE QUrl app_icon() const;
Q_INVOKABLE bool py_supported() const;
Expand Down Expand Up @@ -505,6 +509,7 @@ class settings : public QSettings, public singleton<settings> {
void active_tts_for_in_mnt_ref_voice_changed();
void active_tts_for_out_mnt_ref_voice_changed();
void mnt_clean_text_changed();
void whisper_translate_changed();

// service
void models_dir_changed();
Expand Down
10 changes: 6 additions & 4 deletions src/speech_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1023,9 +1023,9 @@ static std::optional<typename Engine::gpu_device_t> make_gpu_device(
return std::nullopt;
}

QString speech_service::restart_stt_engine(
speech_mode_t speech_mode, const QString &model_id,
[[maybe_unused]] const QString &out_lang_id) {
QString speech_service::restart_stt_engine(speech_mode_t speech_mode,
const QString &model_id,
const QString &out_lang_id) {
auto model_config = choose_model_config(engine_t::stt, model_id);
if (model_config && model_config->stt) {
stt_engine::config_t config;
Expand All @@ -1041,7 +1041,8 @@ QString speech_service::restart_stt_engine(
config.lang_code = model_config->stt->lang_code.toStdString();
config.speech_mode =
static_cast<stt_engine::speech_mode_t>(speech_mode);
config.translate = false;
config.translate = !out_lang_id.isEmpty() && out_lang_id == "en" &&
config.lang != "en";
config.options = model_config->options.toStdString();

if (settings::instance()->stt_use_gpu() &&
Expand Down Expand Up @@ -1081,6 +1082,7 @@ QString speech_service::restart_stt_engine(

if (m_stt_engine->model_files() != config.model_files) return true;
if (m_stt_engine->lang() != config.lang) return true;
if (m_stt_engine->translate() != config.translate) return true;
if (config.use_gpu != m_stt_engine->use_gpu() ||
config.gpu_device != m_stt_engine->gpu_device())
return true;
Expand Down

0 comments on commit 2d52f56

Please sign in to comment.