In [72]:
from transformers.utils import logging
logging.set_verbosity_error()

In [73]:
from datasets import load_dataset

In [74]:
dataset = load_dataset("librispeech_asr",
                       split="train.clean.100",
                       streaming=True,
                       trust_remote_code=True)

In [75]:
example = next(iter(dataset))

In [76]:
from transformers import pipeline

In [77]:
asr = pipeline("automatic-speech-recognition", model="distil-whisper/distil-small.en")

In [78]:
import soundfile as sf
import io

In [79]:
audio, sampling_rate = sf.read('harvard.wav')

In [80]:
import numpy as np

audio_transposed = np.transpose(audio)

In [81]:
import librosa

In [82]:
audio_mono = librosa.to_mono(audio_transposed)

In [83]:
audio_16KHz = librosa.resample(audio_mono,
                               orig_sr=sampling_rate,
                               target_sr=16000)

In [91]:
import gradio as gr
import logging

logging.basicConfig(level=logging.ERROR)

def transcribe_long_form(filepath):
    if filepath is None:
        logging.error("No audio file provided.")
        return "Error: No audio file provided."
    try:
        output = asr(filepath, max_new_tokens=256, chunk_length_s=30, batch_size=8)
        return output["text"]
    except Exception as e:
        logging.error(f"Transcription failed: {e}")
        return f"Error: {e}"

mic_transcribe = gr.Interface(
    fn=transcribe_long_form,
    inputs=gr.Audio(sources="microphone", type="filepath"),
    outputs=gr.Textbox(label="Transcription", lines=3),
    allow_flagging="never")

file_transcribe = gr.Interface(
    fn=transcribe_long_form,
    inputs=gr.Audio(sources="upload", type="filepath"),
    outputs=gr.Textbox(label="Transcription", lines=3),
    allow_flagging="never",
)

with gr.Blocks() as demo:
    gr.TabbedInterface(
        [mic_transcribe, file_transcribe],
        ["Transcribe Microphone", "Transcribe Audio File"],
    )

demo.launch(share=True)

Running on local URL:  http://127.0.0.1:7865
Running on public URL: https://5154c14c171a0b2125.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Need [32m'write'[0m access token to create a Spaces repo.

    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 