-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Incorporate GPU optimizations, noise threshold, and window size in fa…
…sterWhisper This commit enhances the performance of the fasterWhisper script by introducing several optimizations. - The code is modified to run on the GPU with FP32 precision, providing faster computation speed. - A noise threshold value is added, set to a high value of 0.9, to handle noisy radio traffic effectively. - A window size of 1536 samples is supported for 16000 sampling_rate to capture precise audio information. These improvements enhance transcription accuracy and make the script more efficient. No issues referenced.
- Loading branch information
swiftraccoon
committed
Nov 26, 2023
1 parent
5c2b00e
commit 6143cb3
Showing
2 changed files
with
42 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,42 +1,54 @@ | ||
#include <iostream> | ||
#include <string> | ||
#include <stdio.h> | ||
#include <memory> | ||
#include <array> | ||
#include <memory> | ||
// Include necessary headers for Windows | ||
#ifdef _WIN32 | ||
#include <stdio.h> | ||
#else | ||
// POSIX headers for other platforms | ||
#include <cstdio> | ||
#endif | ||
|
||
std::string trim(const std::string& str) { | ||
const char* whitespace = " \t\n\r\f\v"; | ||
std::string trim(const std::string &str) | ||
{ | ||
const char *whitespace = " \t\n\r\f\v"; | ||
|
||
size_t start = str.find_first_not_of(whitespace); | ||
size_t end = str.find_last_not_of(whitespace); | ||
|
||
return (start == std::string::npos || end == std::string::npos) ? "" : str.substr(start, end - start + 1); | ||
} | ||
|
||
std::string local_transcribe_audio(const std::string& mp3FilePath) { | ||
// Command to execute the Python script | ||
std::string local_transcribe_audio(const std::string &mp3FilePath) | ||
{ | ||
std::string command = "python fasterWhisper.py " + mp3FilePath; | ||
|
||
// Create a pipe to read the output of the executed command | ||
std::array<char, 128> buffer; | ||
std::string result; | ||
|
||
// Use the appropriate popen and pclose functions based on the platform | ||
#ifdef _WIN32 | ||
std::unique_ptr<FILE, decltype(&_pclose)> pipe(_popen(command.c_str(), "r"), _pclose); | ||
#else | ||
std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(command.c_str(), "r"), pclose); | ||
if (!pipe) { | ||
#endif | ||
|
||
if (!pipe) | ||
{ | ||
throw std::runtime_error("popen() failed!"); | ||
} | ||
|
||
// Read the output a line at a time | ||
while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) { | ||
while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) | ||
{ | ||
result += buffer.data(); | ||
} | ||
|
||
// Find the start of the JSON object and return everything from this point | ||
size_t jsonStartPos = result.find('{'); | ||
if (jsonStartPos != std::string::npos) { | ||
std::string jsonResult = result.substr(jsonStartPos); | ||
// Trim whitespace and newline characters | ||
return trim(jsonResult); | ||
if (jsonStartPos != std::string::npos) | ||
{ | ||
return trim(result.substr(jsonStartPos)); | ||
} | ||
|
||
return ""; | ||
return "MUCH_BROKEN_very_wow"; | ||
} |