Skip to content

Commit

Permalink
Bump whisper, clblast, add buffered output (#90)
Browse files Browse the repository at this point in the history
* Bump whisper, clblast, add buffered output

* Update CPU_OR_CUDA environment variable error messages

* Update Cublas validation in Package-Windows.ps1 and initialize function in captions-thread.h

* Update Cublas validation and fix typo in Package-Windows.ps1

* Update default whisper model path to Whisper Tiny English (74Mb)

* Update translation strings for multiple locales
  • Loading branch information
royshil committed Apr 18, 2024
1 parent e5a10f4 commit 65da380
Show file tree
Hide file tree
Showing 27 changed files with 528 additions and 239 deletions.
4 changes: 3 additions & 1 deletion .github/scripts/Package-Windows.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ param(
[string] $Target = 'x64',
[ValidateSet('Debug', 'RelWithDebInfo', 'Release', 'MinSizeRel')]
[string] $Configuration = 'RelWithDebInfo',
[ValidateSet('cpu', '12.2.0', '11.8.0')]
[ValidateSet('cpu', 'clblast', '12.2.0', '11.8.0')]
[string] $Cublas = 'cpu',
[switch] $BuildInstaller,
[switch] $SkipDeps
Expand Down Expand Up @@ -52,6 +52,8 @@ function Package {
# Check if $cublas is cpu or cuda
if ( $Cublas -eq 'cpu' ) {
$CudaName = 'cpu'
} elseif ( $Cublas -eq 'cblast' ) {
$CudaName = 'cblast'
} else {
$CudaName = "cuda${Cublas}"
}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-project.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ jobs:
needs: check-event
strategy:
matrix:
cublas: [cpu, 12.2.0, 11.8.0]
cublas: [cpu, clblast, 12.2.0, 11.8.0]
defaults:
run:
shell: pwsh
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/push.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ jobs:
variants=(
'windows-x64-cpu;zip|exe'
'windows-x64-clblast;zip|exe'
'windows-x64-11.8.0;zip|exe'
'windows-x64-12.2.0;zip|exe'
'macos-arm64;tar.xz|pkg'
Expand Down
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ target_sources(
src/whisper-utils/whisper-processing.cpp
src/whisper-utils/whisper-utils.cpp
src/whisper-utils/silero-vad-onnx.cpp
src/translation/translation.cpp)
src/translation/translation.cpp
src/utils.cpp)

set_target_properties_plugin(${CMAKE_PROJECT_NAME} PROPERTIES OUTPUT_NAME ${_name})
5 changes: 3 additions & 2 deletions cmake/BuildCTranslate2.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ elseif(WIN32)

# check CPU_OR_CUDA environment variable
if(NOT DEFINED ENV{CPU_OR_CUDA})
message(FATAL_ERROR "Please set the CPU_OR_CUDA environment variable to either CPU or CUDA")
message(
FATAL_ERROR "Please set the CPU_OR_CUDA environment variable to either `cpu`, `clblast`, `12.2.0` or `11.8.0`")
endif()

if($ENV{CPU_OR_CUDA} STREQUAL "cpu")
if($ENV{CPU_OR_CUDA} STREQUAL "cpu" OR $ENV{CPU_OR_CUDA} STREQUAL "clblast")
FetchContent_Declare(
ctranslate2_fetch
URL https://github.com/occ-ai/obs-ai-ctranslate2-dep/releases/download/1.2.0/libctranslate2-windows-4.1.1-Release-cpu.zip
Expand Down
29 changes: 18 additions & 11 deletions cmake/BuildWhispercpp.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,12 @@ if(APPLE)
endif(NOT DEFINED ENV{MACOS_ARCH})

set(WHISPER_CPP_URL
"https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.1/whispercpp-macos-$ENV{MACOS_ARCH}-0.0.1.tar.gz"
"https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.2/whispercpp-macos-$ENV{MACOS_ARCH}-0.0.2.tar.gz"
)
if($ENV{MACOS_ARCH} STREQUAL "x86_64")
set(WHISPER_CPP_HASH "36F39F02F999AAF157EAD3460DD00C8BDAA3D6C4A769A9E4F64E327871B4B11F")
set(WHISPER_CPP_HASH "00C308AF0BFFF7619934403A8080CC9AFC4EDAA328D7587E617150A2C6A33313")
elseif($ENV{MACOS_ARCH} STREQUAL "arm64")
set(WHISPER_CPP_HASH "6AF7BB904B03B6208B4281D44465B727FB608A32CABD1394B727937C5F4828A1")
set(WHISPER_CPP_HASH "0478E2079E07FA81BEE77506101003F4A4C8F0DF9E23757BD7E1D25DCBD1DB30")
else()
message(
FATAL_ERROR
Expand All @@ -45,24 +45,31 @@ elseif(WIN32)
if(NOT DEFINED ENV{CPU_OR_CUDA})
message(
FATAL_ERROR
"The CPU_OR_CUDA environment variable is not set. Please set it to either `cpu` or `11.8.0` or `12.2.0`")
"The CPU_OR_CUDA environment variable is not set. Please set it to either `cpu`, `clblast` or `11.8.0` or `12.2.0`"
)
endif(NOT DEFINED ENV{CPU_OR_CUDA})

set(CUDA_PREFIX $ENV{CPU_OR_CUDA})
if(NOT $ENV{CPU_OR_CUDA} STREQUAL "cpu")
set(CUDA_PREFIX "cuda$ENV{CPU_OR_CUDA}")
set(ARCH_PREFIX $ENV{CPU_OR_CUDA})
if(NOT $ENV{CPU_OR_CUDA} STREQUAL "cpu" AND NOT $ENV{CPU_OR_CUDA} STREQUAL "clblast")
set(ARCH_PREFIX "cuda$ENV{CPU_OR_CUDA}")
add_compile_definitions("LOCALVOCAL_WITH_CUDA")
elseif($ENV{CPU_OR_CUDA} STREQUAL "cpu")
add_compile_definitions("LOCALVOCAL_WITH_CPU")
else()
add_compile_definitions("LOCALVOCAL_WITH_CLBLAST")
endif()

set(WHISPER_CPP_URL
"https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.1/whispercpp-windows-${CUDA_PREFIX}-0.0.1.zip"
"https://github.com/occ-ai/occ-ai-dep-whispercpp/releases/download/0.0.2/whispercpp-windows-${ARCH_PREFIX}-0.0.2.zip"
)
if($ENV{CPU_OR_CUDA} STREQUAL "cpu")
set(WHISPER_CPP_HASH "5261FCCD18BA52AE7ECD37617452F0514238FAB4B12713F1FCA491F4ABA170AA")
set(WHISPER_CPP_HASH "6DE628A51B9352624A1EC397231591FA3370E6BB42D9364F4F91F11DD18F77D2")
elseif($ENV{CPU_OR_CUDA} STREQUAL "clblast")
set(WHISPER_CPP_HASH "97BF58520F1818B7C9F4E996197F3097934E5E0BBA92B0B016C6B28BE9FF1642")
elseif($ENV{CPU_OR_CUDA} STREQUAL "12.2.0")
set(WHISPER_CPP_HASH "1966A6C7347FCB9529140F8097AED306F31C6DDE328836FD6498A980E20B8E6C")
set(WHISPER_CPP_HASH "48C059A3364E0AAD9FB0D4194BA554865928D22A27ECE5E3C116DC672D5D6EDE")
elseif($ENV{CPU_OR_CUDA} STREQUAL "11.8.0")
set(WHISPER_CPP_HASH "172F4021E888A89A694373AE0888C04DB99BC11F3A2633270248E03AF5AC762E")
set(WHISPER_CPP_HASH "29A5530E83896DE207F0199535CBBB24DF0D63B1373BA66139AD240BA67120EB")
else()
message(
FATAL_ERROR
Expand Down
2 changes: 2 additions & 0 deletions data/locale/ar-SA.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ source_language="لغة المصدر"
translate="ترجمة (⚠️ زيادة المعالجة)"
translate_add_context="الترجمة مع السياق"
whisper_translate="ترجمة إلى الإنجليزية (Whisper)"
buffer_size_msec="حجم الذاكرة المؤقتة (ملي ثانية)"
overlap_size_msec="حجم التداخل (ملي ثانية)"
2 changes: 2 additions & 0 deletions data/locale/de-DE.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ source_language="Quellsprache"
translate="Übersetzen (⚠️ erhöhte Verarbeitung)"
translate_add_context="Mit Kontext übersetzen"
whisper_translate="Ins Englische übersetzen (Flüstern)"
buffer_size_msec="Puffergröße (ms)"
overlap_size_msec="Überlappungsgröße (ms)"
4 changes: 3 additions & 1 deletion data/locale/en-US.ini
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ text_file_output="Text File output"
output_filename="Output filename"
whisper_model="Whisper Model"
external_model_file="External model file"
whisper_parameters="Advanced Settings"
whisper_parameters="Whisper Model Parameters"
language="Language"
whisper_sampling_method="Whisper Sampling Method"
n_threads="Number of threads"
Expand Down Expand Up @@ -49,3 +49,5 @@ source_language="Source language"
translate="Translate (⚠️ increased processing)"
translate_add_context="Translate with context"
whisper_translate="Translate to English (Whisper)"
buffer_size_msec="Buffer size (ms)"
overlap_size_msec="Overlap size (ms)"
2 changes: 2 additions & 0 deletions data/locale/es-ES.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ source_language="Idioma fuente"
translate="Traducir (⚠️ procesamiento aumentado)"
translate_add_context="Traducir con contexto"
whisper_translate="Traducir al inglés (Whisper)"
buffer_size_msec="Tamaño del búfer (ms)"
overlap_size_msec="Tamaño de superposición (ms)"
2 changes: 2 additions & 0 deletions data/locale/fr-FR.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ source_language="Langue source"
translate="Traduire (⚠️ traitement accru)"
translate_add_context="Traduire avec contexte"
whisper_translate="Traduire en anglais (Whisper)"
buffer_size_msec="Taille du tampon (ms)"
overlap_size_msec="Taille de chevauchement (ms)"
2 changes: 2 additions & 0 deletions data/locale/hi-IN.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ source_language="स्रोत भाषा"
translate="अनुवाद करें (⚠️ बढ़ी प्रसंस्करण)"
translate_add_context="संदर्भ के साथ अनुवाद करें"
whisper_translate="अंग्रेजी में अनुवाद करें (व्हिस्पर)"
buffer_size_msec="बफ़र आकार (ms)"
overlap_size_msec="ओवरलैप आकार (ms)"
2 changes: 2 additions & 0 deletions data/locale/ja-JP.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ source_language="ソース言語"
translate="翻訳 (⚠️処理増加)"
translate_add_context="コンテキスト付きで翻訳"
whisper_translate="英語に翻訳(ウィスパー)"
buffer_size_msec="バッファサイズ(ms)"
overlap_size_msec="オーバーラップサイズ(ms)"
2 changes: 2 additions & 0 deletions data/locale/ko-KR.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ source_language="원본 언어"
translate="번역 (⚠️ 처리 시간 증가)"
translate_add_context="컨텍스트와 함께 번역"
whisper_translate="영어로 번역 (속삭임)"
buffer_size_msec="버퍼 크기 (ms)"
overlap_size_msec="오버랩 크기 (ms)"
2 changes: 2 additions & 0 deletions data/locale/pl-PL.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ source_language="Język źródłowy"
translate="Tłumacz (⚠️ zwiększone przetwarzanie)"
translate_add_context="Tłumacz z kontekstem"
whisper_translate="Tłumacz na angielski (Whisper)"
buffer_size_msec="Rozmiar bufora (ms)"
overlap_size_msec="Rozmiar nakładki (ms)"
2 changes: 2 additions & 0 deletions data/locale/pt-BR.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ source_language="Língua de origem"
translate="Traduzir (⚠️ o processamento aumentará)"
translate_add_context="Traduzir com contexto"
whisper_translate="Traduzir para inglês (Whisper)"
buffer_size_msec="Tamanho do buffer (ms)"
overlap_size_msec="Tamanho da sobreposição (ms)"
2 changes: 2 additions & 0 deletions data/locale/ru-RU.ini
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,5 @@ source_language="Исходный язык"
translate="Перевести (⚠️ обработка будет увеличена)"
translate_add_context="Перевести с контекстом"
whisper_translate="Перевести на английский (Whisper)"
buffer_size_msec="Размер буфера (мс)"
overlap_size_msec="Размер перекрытия (мс)"
2 changes: 2 additions & 0 deletions data/locale/zh-CN.ini
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ source_language="源语言"
translate="翻译 (⚠️ 增加处理)"
translate_add_context="带上下文翻译"
whisper_translate="翻译为英语(Whisper)"
buffer_size_msec="缓冲区大小(毫秒)"
overlap_size_msec="重叠大小(毫秒)"
118 changes: 118 additions & 0 deletions src/captions-thread.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#ifndef CAPTIONS_THREAD_H
#define CAPTIONS_THREAD_H

#include <queue>
#include <vector>
#include <chrono>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <functional>
#include <string>

#include <obs.h>

#include "plugin-support.h"

class CaptionMonitor {
public:
// default constructor
CaptionMonitor() = default;

~CaptionMonitor()
{
{
std::lock_guard<std::mutex> lock(queueMutex);
stop = true;
}
condVar.notify_all();
workerThread.join();
}

void initialize(std::function<void(const std::string &)> callback_, size_t maxSize_,
std::chrono::seconds maxTime_)
{
this->callback = callback_;
this->maxSize = maxSize_;
this->maxTime = maxTime_;
this->initialized = true;
this->workerThread = std::thread(&CaptionMonitor::monitor, this);
}

void addWords(const std::vector<std::string> &words)
{
{
std::lock_guard<std::mutex> lock(queueMutex);
for (const auto &word : words) {
wordQueue.push_back(word);
}
this->newDataAvailable = true;
}
condVar.notify_all();
}

private:
void monitor()
{
obs_log(LOG_INFO, "CaptionMonitor::monitor");
auto startTime = std::chrono::steady_clock::now();
while (true) {
std::unique_lock<std::mutex> lock(this->queueMutex);
// wait for new data or stop signal
this->condVar.wait(lock,
[this] { return this->newDataAvailable || this->stop; });

if (this->stop) {
break;
}

if (this->wordQueue.empty()) {
continue;
}

// emit up to maxSize words from the wordQueue
std::vector<std::string> emitted;
while (!this->wordQueue.empty() && emitted.size() <= this->maxSize) {
emitted.push_back(this->wordQueue.front());
this->wordQueue.pop_front();
}
// emit the caption, joining the words with a space
std::string output;
for (const auto &word : emitted) {
output += word + " ";
}
this->callback(output);
// push back the words that were emitted, in reverse order
for (auto it = emitted.rbegin(); it != emitted.rend(); ++it) {
this->wordQueue.push_front(*it);
}

if (this->wordQueue.size() >= this->maxSize ||
std::chrono::steady_clock::now() - startTime >= this->maxTime) {
// flush the queue if it's full or we've reached the max time
size_t words_to_flush =
std::min(this->wordQueue.size(), this->maxSize);
for (size_t i = 0; i < words_to_flush; ++i) {
wordQueue.pop_front();
}
startTime = std::chrono::steady_clock::now();
}

newDataAvailable = false;
}
obs_log(LOG_INFO, "CaptionMonitor::monitor: done");
}

std::deque<std::string> wordQueue;
std::thread workerThread;
std::mutex queueMutex;
std::condition_variable condVar;
std::function<void(std::string)> callback;
size_t maxSize;
std::chrono::seconds maxTime;
bool stop;
bool initialized = false;
bool newDataAvailable = false;
};

#endif // CAPTIONS_THREAD_H
16 changes: 8 additions & 8 deletions src/model-utils/model-infos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ std::map<std::string, ModelInfo> models_info = {{
MODEL_TYPE_TRANSCRIPTION,
{{"https://ggml.ggerganov.com/ggml-model-whisper-base-q5_1.bin",
"422F1AE452ADE6F30A004D7E5C6A43195E4433BC370BF23FAC9CC591F01A8898"}}}},
{"Whisper Base En q5 (57Mb)",
{"Whisper Base English q5 (57Mb)",
{"Whisper Base En q5",
"ggml-model-whisper-base-en-q5_1",
MODEL_TYPE_TRANSCRIPTION,
Expand All @@ -41,7 +41,7 @@ std::map<std::string, ModelInfo> models_info = {{
MODEL_TYPE_TRANSCRIPTION,
{{"https://ggml.ggerganov.com/ggml-model-whisper-base.bin",
"60ED5BC3DD14EEA856493D334349B405782DDCAF0028D4B5DF4088345FBA2EFE"}}}},
{"Whisper Base En (141Mb)",
{"Whisper Base English (141Mb)",
{"Whisper Base En",
"ggml-model-whisper-base-en",
MODEL_TYPE_TRANSCRIPTION,
Expand All @@ -59,7 +59,7 @@ std::map<std::string, ModelInfo> models_info = {{
MODEL_TYPE_TRANSCRIPTION,
{{"https://ggml.ggerganov.com/ggml-model-whisper-medium-q5_0.bin",
"19FEA4B380C3A618EC4723C3EEF2EB785FFBA0D0538CF43F8F235E7B3B34220F"}}}},
{"Whisper Medium En q5 (514Mb)",
{"Whisper Medium English q5 (514Mb)",
{"Whisper Medium En q5",
"ggml-model-whisper-medium-en-q5_0",
MODEL_TYPE_TRANSCRIPTION,
Expand All @@ -71,7 +71,7 @@ std::map<std::string, ModelInfo> models_info = {{
MODEL_TYPE_TRANSCRIPTION,
{{"https://ggml.ggerganov.com/ggml-model-whisper-small-q5_1.bin",
"AE85E4A935D7A567BD102FE55AFC16BB595BDB618E11B2FC7591BC08120411BB"}}}},
{"Whisper Small En q5 (181Mb)",
{"Whisper Small English q5 (181Mb)",
{"Whisper Small En q5",
"ggml-model-whisper-small-en-q5_1",
MODEL_TYPE_TRANSCRIPTION,
Expand All @@ -83,7 +83,7 @@ std::map<std::string, ModelInfo> models_info = {{
MODEL_TYPE_TRANSCRIPTION,
{{"https://ggml.ggerganov.com/ggml-model-whisper-small.bin",
"1BE3A9B2063867B937E64E2EC7483364A79917E157FA98C5D94B5C1FFFEA987B"}}}},
{"Whisper Small En (465Mb)",
{"Whisper Small English (465Mb)",
{"Whisper Small En",
"ggml-model-whisper-small-en",
MODEL_TYPE_TRANSCRIPTION,
Expand All @@ -101,19 +101,19 @@ std::map<std::string, ModelInfo> models_info = {{
MODEL_TYPE_TRANSCRIPTION,
{{"https://ggml.ggerganov.com/ggml-model-whisper-tiny-q5_1.bin",
"818710568DA3CA15689E31A743197B520007872FF9576237BDA97BD1B469C3D7"}}}},
{"Whisper Tiny En q5 (31Mb)",
{"Whisper Tiny English q5 (31Mb)",
{"Whisper Tiny En q5",
"ggml-model-whisper-tiny-en-q5_1",
MODEL_TYPE_TRANSCRIPTION,
{{"https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q5_1.bin",
"C77C5766F1CEF09B6B7D47F21B546CBDDD4157886B3B5D6D4F709E91E66C7C2B"}}}},
{"Whisper Tiny En q8 (42Mb)",
{"Whisper Tiny English q8 (42Mb)",
{"Whisper Tiny En q8",
"ggml-model-whisper-tiny-en-q8_0",
MODEL_TYPE_TRANSCRIPTION,
{{"https://ggml.ggerganov.com/ggml-model-whisper-tiny.en-q8_0.bin",
"5BC2B3860AA151A4C6E7BB095E1FCCE7CF12C7B020CA08DCEC0C6D018BB7DD94"}}}},
{"Whisper Tiny En (74Mb)",
{"Whisper Tiny English (74Mb)",
{"Whisper Tiny En",
"ggml-model-whisper-tiny-en",
MODEL_TYPE_TRANSCRIPTION,
Expand Down
Loading

0 comments on commit 65da380

Please sign in to comment.