Cell 1: Install the necessary libraries


In [5]:
!pip install transformers gradio soundfile librosa




Cell 2: Import necessary modules and set up logging

In [8]:
# Import necessary modules for the application
import os
import gradio as gr
from transformers import pipeline
from transformers.utils import logging

# Suppress logging warnings and info messages to reduce output clutter
logging.set_verbosity_error()

# Load the pre-trained ASR pipeline (assuming the correct model is used)
asr = pipeline(task="automatic-speech-recognition", model="openai/whisper-small")




Cell 3: Define a function to handle speech transcription

In [10]:
# Define the transcription function for the Gradio app
def transcribe_speech(filepath):
    if filepath is None:
        # Warning if no audio file is provided
        gr.Warning("No audio found, please retry.")
        return ""
    
    # Run automatic speech recognition on the provided file
    output = asr(filepath)
    return output["text"]


Cell 4: Build Gradio interfaces for microphone and file upload transcription

In [14]:
# Build the microphone-based transcription interface
mic_transcribe = gr.Interface(
    fn=transcribe_speech,
    inputs=gr.Audio(source="microphone", type="filepath"),
    outputs=gr.Textbox(label="Transcription", lines=3),
    allow_flagging="never"
)

# Build the file upload-based transcription interface
file_transcribe = gr.Interface(
    fn=transcribe_speech,
    inputs=gr.Audio(source="upload", type="filepath"),
    outputs=gr.Textbox(label="Transcription", lines=3),
    allow_flagging="never"
)


TypeError: Audio.__init__() got an unexpected keyword argument 'source'

Cell 5: Create a tabbed interface to switch between microphone and file-based transcription

In [17]:
# Create a Gradio Blocks interface to house both the microphone and file-based transcription options
demo = gr.Blocks()

with demo:
    # Set up a tabbed interface for microphone transcription and file upload transcription
    gr.TabbedInterface(
        [mic_transcribe, file_transcribe],
        ["Transcribe Microphone", "Transcribe Audio File"]
    )


NameError: name 'mic_transcribe' is not defined

Cell 6: Launch the Gradio demo (shareable app)

In [20]:
# Launch the Gradio app, setting the server port dynamically
# The share=True argument allows the app to be shared over the internet
demo.launch(share=True, server_port=int(os.environ.get('PORT1', 7860)))


OSError: Cannot find empty port in range: 7860-7860. You can specify a different port by setting the GRADIO_SERVER_PORT environment variable or passing the `server_port` parameter to `launch()`.

Cell 7: Close the Gradio demo

In [21]:
# Stop the Gradio app to free up resources and avoid conflicts when launching other Gradio apps
demo.close()


Cell 8: Load and preprocess a long audio file

In [25]:
# Import the soundfile library to read and process audio files
import soundfile as sf
import io

# Read the 'narration_example.wav' audio file
audio, sampling_rate = sf.read('narration_example.wav')

# Check the sampling rate of the loaded audio
print("Loaded audio sampling rate:", sampling_rate)

# Check the model's expected sampling rate for ASR
print("ASR model's expected sampling rate:", asr.feature_extractor.sampling_rate)


LibsndfileError: Error opening 'narration_example.wav': System error.

Cell 9: Convert stereo audio to mono using NumPy and librosa


In [28]:
# Import necessary libraries for audio manipulation
import numpy as np
import librosa

# If the audio is stereo (two channels), convert it to mono
if len(audio.shape) > 1:
    audio_mono = librosa.to_mono(audio.T)
    print(f"Converted to mono. New shape: {audio_mono.shape}")
else:
    audio_mono = audio  # If already mono, no need to convert


NameError: name 'audio' is not defined

Cell 10: Resample the audio to match the ASR model's sampling rate

In [31]:
# If the audio's sampling rate doesn't match the ASR model's sampling rate, resample it
asr_sampling_rate = asr.feature_extractor.sampling_rate

if sampling_rate != asr_sampling_rate:
    audio_resampled = librosa.resample(audio_mono, orig_sr=sampling_rate, target_sr=asr_sampling_rate)
    print(f"Resampled audio to {asr_sampling_rate} Hz.")
else:
    audio_resampled = audio_mono


NameError: name 'sampling_rate' is not defined

Cell 11: Perform ASR on the resampled audio


In [34]:
# Perform automatic speech recognition on the resampled audio
transcription = asr(audio_resampled, chunk_length_s=30, batch_size=4, return_timestamps=True)

# Display the ASR output with timestamps
print(transcription["chunks"])


NameError: name 'audio_resampled' is not defined

Cell 12: Build a Gradio interface for long-form audio transcription


In [37]:
# Define a new function to handle long-form transcription
def transcribe_long_form(filepath):
    if filepath is None:
        gr.Warning("No audio found, please retry.")
        return ""
    
    # Process long audio files with chunking and batching for better performance
    output = asr(filepath, max_new_tokens=256, chunk_length_s=30, batch_size=8)
    return output["text"]

# Build the Gradio interface for microphone input
mic_transcribe = gr.Interface(
    fn=transcribe_long_form,
    inputs=gr.Audio(source="microphone", type="filepath"),
    outputs=gr.Textbox(label="Transcription", lines=3),
    allow_flagging="never"
)

# Build the Gradio interface for file upload input
file_transcribe = gr.Interface(
    fn=transcribe_long_form,
    inputs=gr.Audio(source="upload", type="filepath"),
    outputs=gr.Textbox(label="Transcription", lines=3),
    allow_flagging="never"
)


TypeError: Audio.__init__() got an unexpected keyword argument 'source'

Cell 13: Set up the tabbed interface for long-form audio transcription

In [40]:
# Set up the Gradio Blocks interface for long-form audio transcription
demo = gr.Blocks()

with demo:
    gr.TabbedInterface(
        [mic_transcribe, file_transcribe],
        ["Transcribe Microphone", "Transcribe Audio File"]
    )

# Launch the demo with the ability to share it online
demo.launch(share=True, server_port=int(os.environ.get('PORT1', 7860)))


NameError: name 'mic_transcribe' is not defined

Cell 14: Close the Gradio app to avoid conflicts

In [43]:
# Stop the Gradio demo to free up resources for future apps
demo.close()
