Skip to content

Commit

Permalink
Merge remote-tracking branch 'refs/remotes/origin/dev' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
oobabooga committed Jun 29, 2024
2 parents 8803ae1 + cc825dd commit f62aad3
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 1 deletion.
25 changes: 25 additions & 0 deletions extensions/whisper_stt/script.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
var recButton = document.getElementsByClassName("record-button")[0].cloneNode(true);
var generate_button = document.getElementById("Generate");
generate_button.insertAdjacentElement("afterend", recButton);

recButton.style.setProperty("margin-left", "-10px");
recButton.innerText = "Rec.";


recButton.addEventListener("click", function() {
var originalRecordButton = document.getElementsByClassName("record-button")[1];
originalRecordButton.click();

var stopRecordButtons = document.getElementsByClassName("stop-button");
if (stopRecordButtons.length > 1) generate_button.parentElement.removeChild(stopRecordButtons[0]);
var stopRecordButton = document.getElementsByClassName("stop-button")[0];
generate_button.insertAdjacentElement("afterend", stopRecordButton);

//stopRecordButton.style.setProperty("margin-left", "-10px");
stopRecordButton.style.setProperty("padding-right", "10px");
recButton.style.display = "none";

stopRecordButton.addEventListener("click", function() {
recButton.style.display = "flex";
});
});
20 changes: 19 additions & 1 deletion extensions/whisper_stt/script.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from pathlib import Path

import gradio as gr
import speech_recognition as sr
import numpy as np

from modules import shared

Expand Down Expand Up @@ -45,6 +48,11 @@ def do_stt(audio, whipser_model, whipser_language):
def auto_transcribe(audio, auto_submit, whipser_model, whipser_language):
if audio is None:
return "", ""
sample_rate, audio_data = audio
if not isinstance(audio_data[0], np.ndarray): # workaround for chrome audio. Mono?
# Convert to 2 channels, so each sample s_i consists of the same value in both channels [val_i, val_i]
audio_data = np.column_stack((audio_data, audio_data))
audio = (sample_rate, audio_data)
transcription = do_stt(audio, whipser_model, whipser_language)
if auto_submit:
input_hijack.update({"state": True, "value": [transcription, transcription]})
Expand All @@ -55,7 +63,7 @@ def auto_transcribe(audio, auto_submit, whipser_model, whipser_language):
def ui():
with gr.Accordion("Whisper STT", open=True):
with gr.Row():
audio = gr.Audio(source="microphone")
audio = gr.Audio(source="microphone", type="numpy")
with gr.Row():
with gr.Accordion("Settings", open=False):
auto_submit = gr.Checkbox(label='Submit the transcribed audio automatically', value=params['auto_submit'])
Expand All @@ -69,3 +77,13 @@ def ui():
whipser_model.change(lambda x: params.update({"whipser_model": x}), whipser_model, None)
whipser_language.change(lambda x: params.update({"whipser_language": x}), whipser_language, None)
auto_submit.change(lambda x: params.update({"auto_submit": x}), auto_submit, None)


def custom_js():
"""
Returns custom javascript as a string. It is applied whenever the web UI is
loaded.
:return:
"""
with open(Path(__file__).parent.resolve() / "script.js", "r") as f:
return f.read()

0 comments on commit f62aad3

Please sign in to comment.