In [1]:
pip install fuzzywuzzy python-Levenshtein


Note: you may need to restart the kernel to use updated packages.


In [None]:
# Import necessary libraries
import speech_recognition as sr
from gtts import gTTS
import playsound
import nltk
from nltk.tokenize import word_tokenize
import csv
import os
import gradio as gr
from fuzzywuzzy import fuzz
from fuzzywuzzy import process

# Download necessary NLTK data
nltk.download('punkt')

In [None]:
# Load inventory from CSV
def load_inventory(file_path):
    inventory = {}
    with open(file_path, mode='r') as file:
        reader = csv.DictReader(file)
        # loop through each row in the csv
        for row in reader:
            # get product name from current row
            product = row["product"]
            inventory[product] = {
                "quantity": int(row["quantity"]), # convert quantity into an integer
                "volume": row["volume"], # keep volume as a string
                "price": float(row["price"]) # convert price into a float
            }
    return inventory

# Update the file path to the resources folder
csv_file_path = os.path.join('Resources', 'final_inventory.csv')
# load inventory into the function we just defined
inventory = load_inventory(csv_file_path)

In [None]:
# Create a function that will recognize when a person is speaking
def listen(audio_file_path):
    print(f"Received audio file path: {audio_file_path}") # for debugging
    # create a recognizer instance
    recognizer = sr.Recognizer()
    try:
        with sr.AudioFile(audio_file_path) as source:
            # record audio from the file
            audio = recognizer.record(source)
            # use Google's speech recognition to turn audio into text
            text = recognizer.recognize_google(audio)
            print(f"Recognized text: {text}") # for debugging
            # returns the transribed text
            return text
    except sr.UnknownValueError:
        # handles case where speech wasn't understood
        print("Error: Could not understand the audio")
        return "Sorry, I did not understand that."
    except sr.RequestError:
        # handles case where service might be down
        print("Error: Could not request results from Google Speech Recognition service.")
        return "Sorry, the service is down."
    except AssertionError as e:
        # handles case where the audio file isn't valid
        print(f"AssertionError: {e}")
        return "Sorry, the audio file is not valid."

In [None]:
# function to convert text to speech and play it
def speak(text):
    # default message if there is no text
    if not text:
        text = "Sorry, I have nothing to say."
    tts = gTTS(text=text, lang='en') # use gTTS to convert text to speech in English
    # define the filename for the audio file 
    filename = "response.mp3"
    # save the speech that was generated into an mp3 file
    tts.save(filename)
    try:
        # plays mp3 file
        playsound.playsound(filename)
        print(f"Playing sound: {filename}")
    except Exception as e:
        # handles any errors that might happen during playback
        print(f"Error playing sound: {e}")
    return filename

In [None]:
# function to process spoken command
def process_command(command):
    print(f"Processing command: {command}")
    # tokenize the command into individual words and convert into lowercase
    tokens = word_tokenize(command.lower())
    # extract the product name from the command
    product_name = extract_product_name(command)

    # check for a greeting 
    if "hello" in tokens or "Paxil" in tokens:
        response = "Hello, I am Paxil your voice assistant kitty. I am here to help you find information?"
    # check if the command is about stock    
    elif "check" in tokens and "stock" in tokens:
        if product_name in inventory:
            item = inventory[product_name]
            response = f"We have {item['quantity']} {product_name}(s) in stock."
        else:
            response = f"Sorry, we don't have {product_name} in our inventory."
    # check if the command is about price
    elif "price" in tokens:
        if product_name in inventory:
            response = f"The price of {product_name} is ${inventory[product_name]['price']}."
        else:
            response = f"Sorry, we don't have {product_name} in our inventory."
    # check if the command is about volume        
    elif "volume" in tokens:
        if product_name in inventory:
            response = f"The volume of {product_name} is {inventory[product_name]['volume']}."
        else:
            response = f"Sorry, we don't have {product_name} in our inventory."
    # check if the command is a thank you or a goodbye
    elif "thank you" in tokens or "goodbye" in tokens:
        response = "You're welcome, please proceed to check out."
    # if the command doesn't match any pattern    
    else:
        response = "I'm sorry, I didn't understand that. Can you please repeat?"
    print(f"Response: {response}")
    return response

In [None]:
# function to extract the product name from the command using fuzzy matching (library that was used)
def extract_product_name(command):
    # converts command to lower case for case-insensitive comparisons
    command = command.lower()
    # use fuzzy matching to find the best match for the command in the inventory keys
    best_match, best_score = process.extractOne(command, inventory.keys(), scorer=fuzz.partial_ratio)
    # check if the best score is above the threshold (80 in this case)
    if best_score > 80:  # Adjust the threshold as needed (works well enough!)
        # print the best match and its score for debugging
        print(f"Best match: {best_match} with score {best_score}")  # Debug statement
        # return the best match as the product name
        return best_match
    else:
        print(f"No suitable match found for command: {command}")  # Debug statement
        return "" # return an empty string if no match is found

In [None]:
# function to act as the main assistant that processes audio commands
def assistant(audio_file_path):
    print(f"Assistant received audio file path: {audio_file_path}") # debug statement
    # use the listen function to convert audio to text
    command = listen(audio_file_path)
    # process text command and generate a response
    response = process_command(command)
    # convert the response text to speech and play it
    audio_response = speak(response)
    return response, audio_response # returns the text response and path to audio file

In [2]:

# Create Gradio interface
iface = gr.Interface(
    # set the function to be called when an audio input is given
    fn=assistant,
    # define the input type: audio from the mic, saved as a file
    inputs=gr.Audio("microphone", type="filepath"),
    # define the outputs: a text box for transcribed text and an audio player for the response
    outputs=[gr.Textbox(), gr.Audio(type="filepath")]
)
# launches gradio interface and makes it shareable with a public URL
iface.launch(share=True)


  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/stevenhill/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Running on local URL:  http://127.0.0.1:7863
Running on public URL: https://15be218af332910910.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




Assistant received audio file path: /private/var/folders/xw/v1jvfgnx0qd3b1ytj3y03tk00000gn/T/gradio/a3c90b72f37f757f5c8ae67ea61f783b7f85736e/audio.wav
Received audio file path: /private/var/folders/xw/v1jvfgnx0qd3b1ytj3y03tk00000gn/T/gradio/a3c90b72f37f757f5c8ae67ea61f783b7f85736e/audio.wav
Recognized text: hello Paxil
Processing command: hello Paxil
No suitable match found for command: hello paxil
Response: Hello, I am Paxil your voice assistant kitty. I am here to help you find information?
Playing sound: response.mp3
Assistant received audio file path: /private/var/folders/xw/v1jvfgnx0qd3b1ytj3y03tk00000gn/T/gradio/133f83b7566aa6eec7c450cc11f7ee7ed52d7a2d/audio.wav
Received audio file path: /private/var/folders/xw/v1jvfgnx0qd3b1ytj3y03tk00000gn/T/gradio/133f83b7566aa6eec7c450cc11f7ee7ed52d7a2d/audio.wav
Recognized text: what is the price of Final Touch flat
Processing command: what is the price of Final Touch flat
Best match: FINAL TOUCH FLAT-WHITE with score 84
Response: The price 