Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Overlap analysis #92

Merged
merged 17 commits into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
83c8cf0
Update buffer size and overlap size in whisper-processing.h and defau…
royshil Apr 19, 2024
f0c33c0
Update buffer size and overlap size in whisper-processing.h and defau…
royshil Apr 19, 2024
a4c84ae
Update suppress_sentences in en-US.ini and transcription-filter-data.h
royshil Apr 22, 2024
632f8d7
Update suppress_sentences and fix whitespace in transcription-filter-…
royshil Apr 22, 2024
fe9a6a1
Update whisper-processing.cpp and whisper-utils.cpp files
royshil Apr 22, 2024
c53432f
Update findStartOfOverlap function signature to use int instead of si…
royshil Apr 22, 2024
d1b0170
Update Whispercpp_Build_GIT_TAG to use commit 7395c70a748753e3800b63e…
royshil Apr 22, 2024
eb9b4d3
Update buffer size and overlap size in whisper-processing.h and defau…
royshil Apr 23, 2024
b9a854c
Update unused parameter in transcription-filter-properties function
royshil Apr 23, 2024
f773c82
Update log level and add suppress_sentences feature in transcription-…
royshil Apr 23, 2024
84c3858
Add translation output feature in en-US.ini and transcription-filter-…
royshil Apr 23, 2024
5da8858
Add DTW token timestamps and buffered output feature
royshil Apr 23, 2024
ceff553
trigger rebuild
royshil Apr 23, 2024
1f01a18
Refactor remove_leading_trailing_nonalpha function to improve readabi…
royshil Apr 25, 2024
08ec4ac
Refactor is_lead_byte and is_trail_byte macros for improved readabili…
royshil Apr 25, 2024
6932b69
Refactor is_lead_byte and is_trail_byte macros for improved readabili…
royshil Apr 25, 2024
13bcb1a
trigger build
royshil Apr 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,14 @@ target_sources(
PRIVATE src/plugin-main.c
src/transcription-filter.cpp
src/transcription-filter.c
src/transcription-utils.cpp
src/model-utils/model-downloader.cpp
src/model-utils/model-downloader-ui.cpp
src/model-utils/model-infos.cpp
src/whisper-utils/whisper-processing.cpp
src/whisper-utils/whisper-utils.cpp
src/whisper-utils/silero-vad-onnx.cpp
src/whisper-utils/token-buffer-thread.cpp
src/translation/translation.cpp
src/utils.cpp)

Expand Down
6 changes: 3 additions & 3 deletions cmake/BuildWhispercpp.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,12 @@ elseif(WIN32)
install(FILES ${WHISPER_DLLS} DESTINATION "obs-plugins/64bit")

else()
set(Whispercpp_Build_GIT_TAG "f22d27a385d34b1e544031efe8aa2e3d73922791")
set(Whispercpp_Build_GIT_TAG "7395c70a748753e3800b63e3422a2b558a097c80")
set(WHISPER_EXTRA_CXX_FLAGS "-fPIC")
set(WHISPER_ADDITIONAL_CMAKE_ARGS -DWHISPER_BLAS=OFF -DWHISPER_CUBLAS=OFF -DWHISPER_OPENBLAS=OFF -DWHISPER_NO_AVX=ON
-DWHISPER_NO_AVX2=ON)

# On Linux and MacOS build a static Whisper library
# On Linux build a static Whisper library
ExternalProject_Add(
Whispercpp_Build
DOWNLOAD_EXTRACT_TIMESTAMP true
Expand All @@ -133,7 +133,7 @@ else()

ExternalProject_Get_Property(Whispercpp_Build INSTALL_DIR)

# on Linux and MacOS add the static Whisper library to the link line
# add the static Whisper library to the link line
add_library(Whispercpp::Whisper STATIC IMPORTED)
set_target_properties(
Whispercpp::Whisper
Expand Down
4 changes: 4 additions & 0 deletions data/locale/en-US.ini
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,7 @@ translate_add_context="Translate with context"
whisper_translate="Translate to English (Whisper)"
buffer_size_msec="Buffer size (ms)"
overlap_size_msec="Overlap size (ms)"
suppress_sentences="Suppress sentences (each line)"
translate_output="Translation output"
dtw_token_timestamps="DTW token timestamps"
buffered_output="Buffered output (Experimental)"
118 changes: 0 additions & 118 deletions src/captions-thread.h

This file was deleted.

31 changes: 9 additions & 22 deletions src/transcription-filter-data.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,13 @@

#include "translation/translation.h"
#include "whisper-utils/silero-vad-onnx.h"
#include "captions-thread.h"
#include "whisper-utils/whisper-processing.h"
#include "whisper-utils/token-buffer-thread.h"

#define MAX_PREPROC_CHANNELS 10

#define MT_ obs_module_text

enum DetectionResult {
DETECTION_RESULT_UNKNOWN = 0,
DETECTION_RESULT_SILENCE = 1,
DETECTION_RESULT_SPEECH = 2,
};

struct DetectionResultWithText {
DetectionResult result;
std::string text;
uint64_t start_timestamp_ms;
uint64_t end_timestamp_ms;
};

struct transcription_filter_data {
obs_source_t *context; // obs filter source (this filter)
size_t channels; // number of channels
Expand Down Expand Up @@ -64,7 +52,7 @@ struct transcription_filter_data {
struct circlebuf input_buffers[MAX_PREPROC_CHANNELS];

/* Resampler */
audio_resampler_t *resampler;
audio_resampler_t *resampler_to_whisper;

/* whisper */
std::string whisper_model_path;
Expand All @@ -90,15 +78,16 @@ struct transcription_filter_data {
bool translate = false;
std::string source_lang;
std::string target_lang;
std::string translation_output;
bool buffered_output = false;
bool enable_token_ts_dtw = false;
std::string suppress_sentences;

// Last transcription result
std::string last_text;

// Text source to output the subtitles
obs_weak_source_t *text_source;
char *text_source_name;
std::mutex *text_source_mutex;
std::string text_source_name;
// Callback to set the text in the output text source (subtitles)
std::function<void(const DetectionResultWithText &result)> setTextCallback;
// Output file path to write the subtitles
Expand All @@ -115,7 +104,7 @@ struct transcription_filter_data {
// translation context
struct translation_context translation_ctx;

CaptionMonitor captions_monitor;
TokenBufferThread captions_monitor;

// ctor
transcription_filter_data()
Expand All @@ -125,11 +114,9 @@ struct transcription_filter_data {
copy_buffers[i] = nullptr;
}
context = nullptr;
resampler = nullptr;
resampler_to_whisper = nullptr;
whisper_model_path = "";
whisper_context = nullptr;
text_source = nullptr;
text_source_mutex = nullptr;
whisper_buf_mutex = nullptr;
whisper_ctx_mutex = nullptr;
wshiper_thread_cv = nullptr;
Expand Down
Loading
Loading