# ***RUN THESE BEFORE RUNNING THE CODE***

In [None]:
! nvidia-smi

In [None]:
!pip install openai==0.28.0
!pip install git+https://github.com/openai/whisper.git -q
!pip install SpeechRecognition
!pip install openai-whisper language-tool-python
!pip install noisereduce
!apt-get update && apt-get install -y portaudio19-dev python3-dev python-dev ffmpeg libav-tools

# ***MAIN CODE***

In [None]:
# Speech-to-Text with Whisper, Noise Reduction, and Grammar Correction

import whisper
import re
import language_tool_python
import numpy as np
import librosa
from noisereduce import reduce_noise
import soundfile as sf
import os
import subprocess



# Function to convert an audio file to WAV format
def convert_to_wav(input_audio_path, output_audio_path):

    """
    Converts audio to WAV format with a sampling rate of 16kHz if it's not already in WAV format.

    Parameters:
    - input_audio_path (str): Path to the input audio file.
    - output_audio_path (str): Path to save the converted WAV file.

    Returns:
    - str: Path to the WAV file (converted or original).
    """

    if not input_audio_path.lower().endswith('.wav'):
        print("Converting audio to WAV format...")
        try:
            subprocess.run(
                ["ffmpeg", "-i", input_audio_path, "-ar", "16000", output_audio_path],
                check=True
            )
            print(f"Audio converted to WAV format: {output_audio_path}")
        except subprocess.CalledProcessError as e:
            print(f"Error during audio conversion: {e}")
            raise
    else:
        print("Audio is already in WAV format. Proceeding without conversion.")
        output_audio_path = input_audio_path
    return output_audio_path





# Function to reduce noise in an audio file
def denoise_audio(input_audio_path, output_audio_path):

    """
    Applies noise reduction to the input audio file.

    Parameters:
    - input_audio_path (str): Path to the input audio file.
    - output_audio_path (str): Path to save the denoised audio.
    """

    print("Performing noise reduction...")
    try:
        audio, sr = librosa.load(input_audio_path, sr=None)
        noise_profile = audio[:sr * 2]  # Assume the first 2 seconds contain noise
        reduced_noise_audio = reduce_noise(y=audio, sr=sr, y_noise=noise_profile)
        sf.write(output_audio_path, reduced_noise_audio, sr)
        print("Noise reduction complete.")
    except Exception as e:
        print(f"Error during noise reduction: {e}")
        raise





# Function to split audio into smaller segments
def split_audio(input_audio_path, segment_duration=300):

    """
    Splits the audio file into smaller segments of specified duration.

    Parameters:
    - input_audio_path (str): Path to the input audio file.
    - segment_duration (int): Duration of each segment in seconds (default: 300).

    Returns:
    - list: List of paths to the audio segments.
    """

    print("Splitting audio into smaller segments...")
    audio, sr = librosa.load(input_audio_path, sr=None)
    total_duration = librosa.get_duration(y=audio, sr=sr)
    segments = []

    for start in range(0, int(total_duration), segment_duration):
        end = min(start + segment_duration, int(total_duration))
        segment = audio[start * sr:end * sr]
        segment_path = f"segment_{start // segment_duration + 1}.wav"
        sf.write(segment_path, segment, sr)
        segments.append(segment_path)

    print(f"Audio split into {len(segments)} segments.")
    return segments




# Function to process transcription for special commands
def process_transcription(transcription):

    """
    Processes the transcription to replace voice commands with punctuation or formatting.

    Parameters:
    - transcription (str): Raw transcription text.

    Returns:
    - str: Processed transcription.
    """

    commands = {
        r"\bfull stop\b": ".",
        r"\bPull stop\b": ".",
        r"\bnext para\b": "\n",
        r"\bnext paragraph\b": "\n",
        r"\bcomma\b": ",",
        r"\bsemicolon\b": ";",
        r"\bcolon\b": ":"
    }
    for command, symbol in commands.items():
        transcription = re.sub(command, symbol, transcription, flags=re.IGNORECASE)
    return transcription






# Function to correct grammar in transcription
def correct_grammar(transcription):

    """
    Uses LanguageTool to correct grammatical errors in the transcription.

    Parameters:
    - transcription (str): Text to correct.

    Returns:
    - str: Corrected transcription.
    """

    tool = language_tool_python.LanguageTool("en-US")
    matches = tool.check(transcription)
    corrected_text = language_tool_python.utils.correct(transcription, matches)
    return corrected_text




# Main function to execute the workflow
def main():

    """
    Main workflow to process audio and generate a grammatically correct transcription.
    """

    print("Loading Whisper model...")
    model = whisper.load_model("medium")

    audio_file = input("Enter the path to your audio file: ").strip()
    temp_wav_file = "temp_audio.wav"
    denoised_audio_file = "denoised_audio.wav"

    try:
        # Step 1: Convert to WAV format
        audio_file = convert_to_wav(audio_file, temp_wav_file)

        # Step 2: Denoise the audio
        denoise_audio(audio_file, denoised_audio_file)

        # Step 3: Split the audio into segments
        segments = split_audio(denoised_audio_file, segment_duration=300)

        # Step 4: Transcribe and process each segment
        combined_transcription = ""

        for i, segment in enumerate(segments):
            print(f"Processing segment {i + 1}/{len(segments)}: {segment}")
            result = model.transcribe(segment)
            transcription = result["text"]
            print(f"Transcription for segment {i + 1}:")
            print(transcription)

            processed_transcription = process_transcription(transcription)
            final_transcription = correct_grammar(processed_transcription)

            combined_transcription += final_transcription + "\n"

            # Clean up temporary segment file
            os.remove(segment)

        # Step 5: Save the final transcription
        print("\nFinal Combined Transcription:")
        print(combined_transcription)

        output_file = "final_transcription_output.txt"
        with open(output_file, "w") as file:
            file.write(combined_transcription)

        print(f"\nFinal transcription saved to: {output_file}")

    except FileNotFoundError:
        print("Error: The specified file was not found. Please check the file path and try again.")
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        # Clean up temporary files
        if os.path.exists(temp_wav_file):
            os.remove(temp_wav_file)
        if os.path.exists(denoised_audio_file):
            os.remove(denoised_audio_file)



if __name__ == "__main__":
    main()
