In [35]:
import warnings
warnings.filterwarnings('ignore')
from transformers.utils import logging
from transformers import pipeline
logging.set_verbosity_error()
import gradio as gr
asr = gr.Blocks()

# Get the speech recognizer model
speech_recognizer = pipeline(task = 'automatic-speech-recognition',
                             model = 'distil-whisper/distil-small.en')

def transcribe(filepath):
    if filepath is None:
        gr.Warning('No audio found, please retry.')
        return ''
    output = speech_recognizer(
        filepath,
        max_new_tokens = 256,
        chunk_length_s = 30,
        batch_size = 1   # If you have more computational capability, you can increase the batch size
    )
    return output['text']

mic_transcribe = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(sources="microphone",
                    type="filepath"),
    outputs=gr.Textbox(label="Transcription",
                       lines=3),
    allow_flagging="never")

file_transcribe = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(sources="upload",
                    type="filepath"),
    outputs=gr.Textbox(label="Transcription",
                       lines=3),
    allow_flagging="never",
)

with asr:
    gr.TabbedInterface(
        [mic_transcribe,
         file_transcribe],
        ["Transcribe Microphone",
         "Transcribe Audio File"],
    )



# Add Markdown content
markdown_content_asr = gr.Markdown(
    """
    <div style='text-align: center; font-family: "Times New Roman";'>
        <h1 style='color: #FF6347;'>Automatic Speech Recognition</h1>
        <h3 style='color: #4682B4;'>Model: distil-whisper/distil-small.en</h3>
        <h3 style='color: #32CD32;'>Made By: Md. Mahmudun Nabi</h3>
    </div>
    """
)

# Combine the Markdown content and the demo interface
asr_with_markdown = gr.Blocks()
with asr_with_markdown:
    markdown_content_asr.render()
    asr.render()

In [31]:
# get the text to speech model
narrator = pipeline(task = 'text-to-speech',
                    model = 'kakao-enterprise/vits-ljs')

import numpy as np
from phonemizer.backend.espeak.wrapper import EspeakWrapper
EspeakWrapper.set_library('C:\Program Files\eSpeak NG\libespeak-ng.dll')

def narrate_text(text):
    # Generate the narrated audio
    narrated_text = narrator(text)
    audio = narrated_text['audio'][0]
    sampling_rate = narrated_text['sampling_rate']
    # Convert the audio to a format playable in Gradio
    return sampling_rate, audio

# Create the Gradio interface
text_to_speech_interface = gr.Interface(
    fn=narrate_text, 
    inputs=gr.Textbox(lines=5, placeholder="Enter text here...", label = "Input Text"), 
    outputs = 'audio',
    allow_flagging = 'never'
)

# Add Markdown content
markdown_content_text2speech = gr.Markdown(
    """
    <div style='text-align: center; font-family: "Times New Roman";'>
        <h1 style='color: #FF6347;'>Text to Speech</h1>
        <h3 style='color: #4682B4;'>Model: kakao-enterprise/vits-ljs</h3>
        <h3 style='color: #32CD32;'>Made By: Md. Mahmudun Nabi</h3>
    </div>
    """
)

# Combine the Markdown content and the demo interface
text2speech_with_markdown = gr.Blocks()
with text2speech_with_markdown:
    markdown_content_text2speech.render()
    text_to_speech_interface.render()

In [39]:
app = gr.Blocks()
with app:
    gr.TabbedInterface(
        [asr_with_markdown,
         text2speech_with_markdown],
        ["Automatic Speech Recognition",
         "Text to Specch"],
    )

In [40]:
app.launch()

Running on local URL:  http://127.0.0.1:7869

To create a public link, set `share=True` in `launch()`.




In [42]:
app.close()

Closing server running on port: 7869
