In [1]:
import speech_recognition as sr # pip install SpeechRecognition
import pyaudio
import webbrowser

In [2]:
def record_audio(filename, duration=5):
    r = sr.Recognizer()
    with sr.Microphone() as source:
        print("Say something!")
        r.adjust_for_ambient_noise(source)
        audio = r.listen(source, phrase_time_limit=duration) 

    with open(filename, "wb") as f:
        f.write(audio.get_wav_data())
        
    print(f"Audio recorded as {filename}")
    return filename

In [3]:
def audio_to_text(filename):
    r = sr.Recognizer()
    with sr.AudioFile(filename) as source:
        audio = r.record(source) 

    try:
        # Recognize speech using Google Speech Recognition
        text = r.recognize_google(audio, language='pt-BR') 
        print(f"Recognized text: {text}")
        return text
    except sr.UnknownValueError:
        print("Google Speech Recognition could not understand audio.")
        return None
    except sr.RequestError as e:
        print(f"Could not request results from Google Speech Recognition service; {e}")
        return None

In [4]:
INTENTS_AND_KEYWORDS = {
    "search": ["pesquisar", "buscar", "procurar", "encontrar"],
    "play_media": ["tocar", "ouvir", "reproduzir"]
}

In [5]:
def extract_intent_and_object(text):
    text_original = text.lower()
    identified_intent = "unknown"
    identified_object = None

    for intent_name, keywords in INTENTS_AND_KEYWORDS.items():
        for keyword in keywords:
            index = text_original.find(keyword)

            if index != -1:
                # Found a keyword that defines the intent
                identified_intent = intent_name

                # Remove the keyword from the text to extract the object
                text_after_removal = text_original.replace(keyword, "").strip()

                # Basic cleaning of the object: remove punctuation and normalize spaces
                cleaned_object = ""
                for char in text_after_removal:
                    if char.isalnum() or char.isspace(): # Keep alphanumeric chars and spaces
                        cleaned_object += char

                cleaned_object = ' '.join(cleaned_object.split()) 

                if cleaned_object:
                    identified_object = cleaned_object

                if identified_intent != "unknown" and identified_object:
                    break
        if identified_intent != "unknown" and identified_object:
            break 

    return {"intent": identified_intent, "object": identified_object}

In [6]:
def execute_action(nlp_result):
    intent = nlp_result["intent"]
    obj = nlp_result["object"]

    # --- Action Execution ---
    if intent == "search":
        if obj:
            print(f"Executing search on Google for: '{obj}'")
            try:
                # Open Google search in the default web browser
                webbrowser.open(f"https://www.google.com/search?q={obj}")
            except Exception as e:
                print(f"Could not open browser: {e}")
        else:
            print("I don't know what to search for.")

    elif intent == "play_media":
        if obj:
            print(f"Executing playback on YouTube for: '{obj}'")
            try:
                # Open YouTube search results in the default web browser
                webbrowser.open(f"https://www.youtube.com/results?search_query={obj}")
            except Exception as e:
                print(f"Could not open Youtube: {e}")
        else:
            print("I don't know what to play.")

    else:
        print("Command not understood.")

In [7]:
audio_file = record_audio("voice_command.wav", duration=8)

Say something!
Audio recorded as voice_command.wav


In [8]:
command_text = audio_to_text("voice_command.wav")

Recognized text: buscar farmácia mais próxima


In [9]:
if command_text:
    nlp_result = extract_intent_and_object(command_text)
    print(f"Interpreted: {nlp_result}")
    execute_action(nlp_result)

Interpreted: {'intent': 'search', 'object': 'farmácia mais próxima'}
Executing search on Google for: 'farmácia mais próxima'
