In [1]:
pip install --upgrade gradio


Collecting gradio
  Downloading gradio-5.16.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.8-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.7.0 (from gradio)
  Downloading gradio_client-1.7.0-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.9.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.meta

In [None]:
# Install necessary dependencies
try:
    import pip
    pip.main(["install", "gradio", "openai-whisper", "pillow", "pytesseract", "gtts"])
except Exception as e:
    print("Error installing dependencies:", e)

In [19]:
!apt-get install tesseract-ocr -y
!pip install pytesseract

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  tesseract-ocr-eng tesseract-ocr-osd
The following NEW packages will be installed:
  tesseract-ocr tesseract-ocr-eng tesseract-ocr-osd
0 upgraded, 3 newly installed, 0 to remove and 20 not upgraded.
Need to get 4,816 kB of archives.
After this operation, 15.6 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-eng all 1:4.00~git30-7274cfa-1.1 [1,591 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-osd all 1:4.00~git30-7274cfa-1.1 [2,990 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr amd64 4.1.1-2.1build1 [236 kB]
Fetched 4,816 kB in 2s (2,869 kB/s)
Selecting previously unselected package tesseract-ocr-eng.
(Reading database ... 124926 files and directories currently installed.)
Preparing to unpack .../tesseract-ocr-

In [17]:
!pip install opencv-python



In [22]:
import gradio as gr
import whisper
from PIL import Image
import pytesseract
from gtts import gTTS
import os
import cv2
import matplotlib.pyplot as plt


def transcribe_audio(file):
    model = whisper.load_model("small")  # Load the Whisper model
    result = model.transcribe(file)  # Transcribe the audio file
    text = result["text"]

    # Save transcription to a file
    with open("transcription.txt", "w") as f:
        f.write(text)

    return text

def img_to_text(img):
    try:
        with Image.open(img) as image:
            text = pytesseract.image_to_string(image)
        return text if text.strip() else "No text found in the image."
    except Exception as e:
        return f"Error processing image: {e}"

def img_readable(img_path):
    img = cv2.imread(img_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray = cv2.resize(gray, (600, 950))
    _, result = cv2.threshold(gray, 20, 255, cv2.THRESH_BINARY)
    adap = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)

    # Save processed image
    output_path = "processed_image.jpg"
    cv2.imwrite(output_path, adap)

    return output_path

def text_to_speech(text):
    tts = gTTS(text=text, lang="en", slow=False, tld="co.uk")
    filename = "output.mp3"
    tts.save(filename)
    return filename

iface_audio = gr.Interface(
    fn=transcribe_audio,
    inputs=gr.Audio(type="filepath", label="Upload Audio File"),
    outputs=gr.Textbox(label="Transcription"),
    title="Audio Transcription",
    description="Upload an audio file to transcribe it using Whisper."
)

iface_img = gr.Interface(
    fn=img_to_text,
    inputs=gr.Image(type="filepath", label="Upload Image"),
    outputs=gr.Textbox(label="Extracted Text"),
    title="Image to Text Converter",
    description="Upload an image to extract text using Tesseract OCR."
)

iface_img_readable = gr.Interface(
    fn=img_readable,
    inputs=gr.Image(type="filepath", label="Upload Image for Preprocessing"),
    outputs=gr.Image(type="filepath", label="Processed Image"),
    title="Image Preprocessing",
    description="Upload an image to apply preprocessing techniques."
)

iface_tts = gr.Interface(
    fn=text_to_speech,
    inputs=gr.Textbox(label="Enter text"),
    outputs=gr.Audio(label="Generated Speech"),
    title="Text to Speech Converter",
    description="Enter text and convert it to speech using Google TTS."
)

app = gr.TabbedInterface([iface_audio, iface_img, iface_img_readable, iface_tts], ["Audio Transcription", "Image to Text", "Image Preprocessing", "Text to Speech"])

if __name__ == "__main__":
    app.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()


* Running on public URL: https://554afec76e2cc88d4a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
