You can create a program in Python that converts audio into text using speech recognition. Here's how to do it:

    Install the necessary libraries:
        Install the speechrecognition library for recognizing speech.
        Install pyaudio to capture microphone input (if you want to use real-time audio input).

In [50]:
import speech_recognition as sr

# Initialize recognizer
recognizer = sr.Recognizer()

# Capture audio from the microphone
with sr.Microphone() as source:
    print("Please say something...")
    recognizer.adjust_for_ambient_noise(source)
    audio = recognizer.listen(source)

# Use Google Web Speech API to recognize the audio
try:
    print("Google Speech Recognition thinks you said:")
    text = recognizer.recognize_google(audio)
    print(text)
except sr.UnknownValueError:
    print("Google Speech Recognition could not understand the audio")
except sr.RequestError:
    print("Could not request results from Google Speech Recognition service")


Please say something...
Google Speech Recognition thinks you said:
python


## Audio to text

In [62]:
import speech_recognition as sr

def audio_to_text():
    # Initialize recognizer
    recognizer = sr.Recognizer()

    # Use microphone as the source
    with sr.Microphone() as source:
        print("Please say something...")
        recognizer.adjust_for_ambient_noise(source)  # Adjust for ambient noise
        audio = recognizer.listen(source)

    try:
        print("Recognizing...")
        # Use Google Web Speech API to convert audio to text
        text = recognizer.recognize_google(audio)
        print(f"Text: {text}")
    except sr.UnknownValueError:
        print("Sorry, I could not understand the audio.")
    except sr.RequestError:
        print("Could not request results from Google Speech Recognition service.")

if __name__ == "__main__":
    audio_to_text()


Please say something...
Recognizing...
Text: hello I am under the water please save me


## Text to audio

In [64]:
import pyttsx3

def text_to_speech(text):
    # Initialize the pyttsx3 engine
    engine = pyttsx3.init()

    # Set properties (Optional, you can adjust voice rate and volume)
    engine.setProperty('rate', 150)  # Speed of speech (higher is faster)
    engine.setProperty('volume', 1)  # Volume level (0.0 to 1.0)

    # You can change the voice (male/female)
    voices = engine.getProperty('voices')
    engine.setProperty('voice', voices[1].id)  # 0 for male, 1 for female voice

    # Convert text to speech
    engine.say(text)

    # Wait until speech is finished
    engine.runAndWait()

if __name__ == "__main__":
    text = "Hello, how are you today?"
    text_to_speech(text)


# Speech to text -> Text to Speech

In [72]:
import speech_recognition as sr
import pyttsx3

def audio_to_text():
    # Initialize recognizer
    recognizer = sr.Recognizer()

    # Use microphone as the source
    with sr.Microphone() as source:
        print("Please say something...")
        recognizer.adjust_for_ambient_noise(source)  # Adjust for ambient noise
        audio = recognizer.listen(source)

    try:
        print("Recognizing...")
        # Use Google Web Speech API to convert audio to text
        text = recognizer.recognize_google(audio)
        print(f"Text: {text}")
        return text
    except sr.UnknownValueError:
        print("Sorry, I could not understand the audio.")
        return None
    except sr.RequestError:
        print("Could not request results from Google Speech Recognition service.")
        return None

def text_to_speech(text):
    # Initialize the TTS engine
    engine = pyttsx3.init()

    # Set properties (optional)
    rate = engine.getProperty('rate')  # Get the current speaking rate
    engine.setProperty('rate', rate-50)  # Adjust speed (optional)
    volume = engine.getProperty('volume')  # Get the current volume level
    engine.setProperty('volume', 1)  # Set volume to maximum (optional)
    voices = engine.getProperty('voices')
    engine.setProperty('voice', voices[0].id)  # 0 for male, 1 for female voice

    # Speak the text
    engine.say(text)
    engine.runAndWait()

if __name__ == "__main__":
    # Convert audio to text
    recognized_text = audio_to_text()
    
    # If recognized text is not None, convert it to speech
    if recognized_text:
        text_to_speech(recognized_text)


Please say something...
Recognizing...
Text: hello I am under the water please say me
