<a href="https://colab.research.google.com/github/mahin632/Speech-Text/blob/main/Speech_Text.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
# Install Java 17 (required for LanguageTool)
!apt-get install openjdk-17-jre-headless -qq > /dev/null

# Set Java 17 as default
!update-alternatives --install /usr/bin/java java /usr/lib/jvm/java-17-openjdk-amd64/bin/java 1
!update-alternatives --set java /usr/lib/jvm/java-17-openjdk-amd64/bin/java

# Verify Java version
!java -version

update-alternatives: using /usr/lib/jvm/java-11-openjdk-amd64/bin/java to provide /usr/bin/java (java) in auto mode
update-alternatives: using /usr/lib/jvm/java-17-openjdk-amd64/bin/java to provide /usr/bin/java (java) in manual mode
openjdk version "17.0.16" 2025-07-15
OpenJDK Runtime Environment (build 17.0.16+8-Ubuntu-0ubuntu122.04.1)
OpenJDK 64-Bit Server VM (build 17.0.16+8-Ubuntu-0ubuntu122.04.1, mixed mode, sharing)


In [9]:
# Install dependencies
!pip install SpeechRecognition language-tool-python pydub

import speech_recognition as sr
import language_tool_python
from pydub import AudioSegment
from google.colab import files
from IPython.display import display, Markdown

# ---- Upload and Convert ----
def upload_audio():
    print("Please upload an audio file (.wav or .mp3)...")
    uploaded = files.upload()
    for filename in uploaded.keys():
        return filename
    return None

def convert_to_wav(file_path):
    if file_path.endswith(".mp3"):
        wav_path = file_path.replace(".mp3", ".wav")
        sound = AudioSegment.from_mp3(file_path)
        sound.export(wav_path, format="wav")
        return wav_path
    return file_path

# ---- Transcription ----
def transcribe_audio_file(file_path):
    recognizer = sr.Recognizer()
    with sr.AudioFile(file_path) as source:
        print(" Processing audio file...")
        audio = recognizer.record(source)

    try:
        return recognizer.recognize_google(audio)
    except sr.UnknownValueError:
        return " Could not understand audio"
    except sr.RequestError:
        return " API error or no internet connection"

# ---- Text Formatting ----
def fix_text_formatting(text):
    text = text.strip()
    text = text[0].upper() + text[1:] if text else ""
    if text and text[-1] not in ".!?":
        text += "."
    return text

# ---- Grammar Analysis ----
def analyze_text(text):
    tool = language_tool_python.LanguageTool('en-US')
    grammar_errors = tool.check(text)
    num_errors = len(grammar_errors)
    total_words = len(text.split())

    if total_words == 0:
        accuracy_score = 0
    else:
        error_penalty = (num_errors / total_words) * 100
        accuracy_score = max(0, 100 - error_penalty)

    return grammar_errors, accuracy_score

# Interactive Execution

file_name = upload_audio()
if file_name:
    file_path = convert_to_wav(file_name)
    transcribed_text = transcribe_audio_file(file_path)

    display(Markdown("###  Original Transcription (Raw):"))
    display(Markdown(f"> {transcribed_text}"))

    if "Could not understand" in transcribed_text or "API error" in transcribed_text:
        display(Markdown(" **Please try again with a clearer voice or another file.**"))
    else:
        formatted_text = fix_text_formatting(transcribed_text)

        display(Markdown("###  Formatted Transcription:"))
        display(Markdown(f"> {formatted_text}"))

        grammar_mistakes, accuracy_score = analyze_text(formatted_text)
        num_errors = len(grammar_mistakes)

        display(Markdown(f"###  Grammar Mistakes Found: **{num_errors}**"))
        if num_errors > 0:
            mistake_list = ""
            for mistake in grammar_mistakes:
                mistake_list += f"- **Incorrect:** {mistake.context}\n  - **Suggestion:** {', '.join(mistake.replacements)}\n"
            display(Markdown(mistake_list))

        display(Markdown(f"###  Final Accuracy Score: **{accuracy_score:.2f}%**"))


Please upload an audio file (.wav or .mp3)...


Saving speech.wav to speech (4).wav
 Processing audio file...


###  Original Transcription (Raw):

> Python Programming is the best of all by

###  Formatted Transcription:

> Python Programming is the best of all by.

###  Grammar Mistakes Found: **0**

###  Final Accuracy Score: **100.00%**