# 1. Parse Menu

In [2]:
import json

with open('menu.json') as f:
  data = json.load(f)
menu_data = []

for menu_category, menu_item_dict in data.items():
    for menu_item, menu_item_info in menu_item_dict.items():
        if menu_category != 'Menus' and isinstance(menu_item_info, list):
            nutritional_info = {}
            item_id = menu_item
            if menu_item_info[2].keys() == {'nutritionalInfo', 'available'}:
                nutritional_info = menu_item_info[2]['nutritionalInfo']
                nutritional_info.pop('itemId')
                available = menu_item_info[2]['available']
                
            elif menu_item_info[2].keys() == {'available'}:
                available = menu_item_info[2]['available']
                
            else:
                pass
            
            allergens = nutritional_info.get('allergens', [])
            nutritional_info.pop('allergens', None)
            result = {
                "itemId": item_id,
                "category": menu_category,
                "name": menu_item_info[0],
                "price": float(menu_item_info[1]),
                "soy" : (True if 'soy' in allergens else False),
                "lactose" : (True if ("dairy" in allergens) or ("whey" in allergens) else False),
                "gluten" : (True if 'wheat' in allergens else False),
                "nutritionalInfo": nutritional_info,
                "contents": [],
                "available": bool(available)
            }
            print(result)
            menu_data.append(result)
            
        elif menu_category == 'Menus':
            item_id = menu_item
            name = menu_item_info['name']
            price = menu_item_info['price']
            contents = menu_item_info['contents']
            
            new_contents = []
            for item in contents:
                if isinstance(item, list):
                    _item_id = item[0]
                    _qty = item[1]
                    new_contents.append({
                        "itemId": _item_id,
                        "quantity": _qty
                    })
                
                elif isinstance(item, dict):
                    new_contents.append(item)
                else:
                    continue
                
            allergens = nutritional_info.get('allergens', [])
            nutritional_info.pop('allergens', None)
            result = {
                "itemId": item_id,
                "category": menu_category,
                "name": name,
                "price": price,
                "soy" : (True if 'soy' in allergens else False),
                "lactose" : (True if ("dairy" in allergens) or ("whey" in allergens) else False),
                "gluten" : (True if 'wheat' in allergens else False),
                "nutritionalInfo": {},
                "contents": new_contents,
                "available": True
            }
            print(result)
            menu_data.append(result)

{'itemId': 'C1', 'category': 'Chicken', 'name': 'Original Recipe', 'price': 3.5, 'soy': True, 'lactose': False, 'gluten': True, 'nutritionalInfo': {'kcal': 400, 'fat': 22, 'protein': 28}, 'contents': [], 'available': False}
{'itemId': 'C2', 'category': 'Chicken', 'name': 'Popcorn Chicken', 'price': 4.0, 'soy': True, 'lactose': False, 'gluten': True, 'nutritionalInfo': {'kcal': 350, 'fat': 20, 'protein': 25}, 'contents': [], 'available': False}
{'itemId': 'C4', 'category': 'Chicken', 'name': 'Hot Wings', 'price': 3.0, 'soy': False, 'lactose': False, 'gluten': True, 'nutritionalInfo': {'kcal': 270, 'fat': 18, 'protein': 19}, 'contents': [], 'available': False}
{'itemId': 'C5', 'category': 'Chicken', 'name': 'Snackbox', 'price': 15.0, 'soy': False, 'lactose': False, 'gluten': False, 'nutritionalInfo': {'kcal': 150, 'fat': 100, 'protein': 10}, 'contents': [], 'available': False}
{'itemId': 'C6', 'category': 'Chicken', 'name': 'Crispy Tenders', 'price': 15.0, 'soy': False, 'lactose': False,

# 2. Upload to DB

In [10]:
import weaviate
import weaviate.classes as wvc
from weaviate.classes.config import DataType, Property, VectorDistances
from dotenv import load_dotenv
from sentence_transformers import SentenceTransformer
load_dotenv()

client = weaviate.connect_to_local()
client.collections.delete("Kfc_menu")
embedding_model = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1")

In [11]:
client.collections.delete("Kfc_menu")
if not client.collections.exists("Kfc_menu"):
    collection = client.collections.create(
            name = "Kfc_menu",
            vector_index_config=wvc.config.Configure.VectorIndex.hnsw(
                distance_metric=VectorDistances.COSINE,
                quantizer = wvc.config.Configure.VectorIndex.Quantizer.pq()
            ),
            properties = [
                Property(
                    name = "itemId",
                    data_type = DataType.TEXT,
                    index_searchable=True,
                ),
                Property(
                    name = "category",
                    data_type = DataType.TEXT,
                    index_searchable=True,
                ),
                Property(
                    name = "name",
                    data_type = DataType.TEXT,
                    index_searchable=True,
                ),
                Property(
                    name = "price",
                    data_type = DataType.NUMBER,
                ),
                Property(
                    name = "nutritionalInfo",
                    data_type = DataType.OBJECT,
                    nested_properties=[
                        Property(
                            name = "kcal",
                            data_type = DataType.NUMBER,
                        ),
                        Property(
                            name = "fat",
                            data_type = DataType.NUMBER,
                            
                        ),
                        Property(
                            name = "protein",
                            data_type = DataType.NUMBER,
                        ),
                    ],
                    # skip_vectorization=True
                ),
                Property(
                    name = "lactose",
                    data_type = DataType.BOOL,
                    # index_searchable=True
                ),
                Property(
                    name = "gluten",
                    data_type = DataType.BOOL,
                    # index_searchable=True
                ),
                Property(
                    name = "soy",
                    data_type = DataType.BOOL,
                ),
                Property(
                    name="contents",
                    data_type=DataType.OBJECT_ARRAY,
                    skip_vectorization=True,
                    nested_properties=[
                        Property(
                            name="itemId",
                            data_type=DataType.TEXT
                        ),
                        Property(
                            name="quantity",
                            data_type=DataType.NUMBER
                        ),
                        Property(
                            name="from",
                            data_type=DataType.TEXT
                        ),
                        Property(
                            name="choose",
                            data_type=DataType.NUMBER
                        ),
                        Property(
                            name="size",
                            data_type=DataType.TEXT
                        )
                    ]
                ),
                Property(
                    name = "available",
                    data_type = DataType.BOOL,
                ),
            ]
        )
    
with collection.batch.dynamic() as batch:
    for item in menu_data:
        allergens_list = []
        if item["gluten"]:
            allergens_list.append('wheat (CONTAINS GLUTEN)')
        if item["lactose"]:
            allergens_list.append('dairy (CONTAINS LACTOSE)')
        if item["soy"]:
            allergens_list.append('soy (CONTAINS SOY)')
        else:
            pass

        string_to_embed = f"allergens:{allergens_list or ''}, name:{item['name'].lower()}, category:{item['category'].lower()}"
        print(string_to_embed)
        vector = embedding_model.encode(string_to_embed)
        batch.add_object(
            properties=item,
            vector = vector
        )

allergens:['wheat (CONTAINS GLUTEN)', 'soy (CONTAINS SOY)'], name:original recipe, category:chicken
allergens:['wheat (CONTAINS GLUTEN)', 'soy (CONTAINS SOY)'], name:popcorn chicken, category:chicken
allergens:['wheat (CONTAINS GLUTEN)'], name:hot wings, category:chicken
allergens:, name:snackbox, category:chicken
allergens:, name:crispy tenders, category:chicken
allergens:, name:original piece, category:chicken
allergens:, name:tender chicken, category:chicken
allergens:['wheat (CONTAINS GLUTEN)'], name:iced tea, category:drinks
allergens:['dairy (CONTAINS LACTOSE)'], name:pepsi, category:drinks
allergens:['wheat (CONTAINS GLUTEN)'], name:7up, category:drinks
allergens:['dairy (CONTAINS LACTOSE)'], name:fanta, category:drinks
allergens:, name:sourcy, category:drinks
allergens:, name:tropicana apple, category:drinks
allergens:, name:guava, category:drinks
allergens:, name:tea, category:drinks
allergens:, name:latte, category:drinks
allergens:, name:espresso, category:drinks
allergens:,

# 3. Experiment with Retriever

In [12]:
import weaviate
import weaviate.classes as wvc
import time
from drive_thru_bot.agent.retriever import Retriever

client = weaviate.connect_to_local()
collection = client.collections.get("Kfc_menu")

retriever = Retriever(client=client, collection_name="kfc_menu")

### Consider the following questions being asked:

- Hi, do you have cola?
- Hi I want to have a Fire Zinger Stacker without sauce and a cola
- Give me a Veggie Tender, medium, with salad
- Give me an orange chocolate milkshake, medium
- Give me the gluten free burger options
- How many calories does the Colonel have?
- Can I get a Whopper?

In [13]:
test_queries = [
    "Hi, do you have cola?",
    "Hi I want to have a Fire Zinger Stacker without sauce and a cola",
    "Give me a Veggie Tender, medium, with salad",
    "Give me an orange chocolate milkshake, medium",
    "Give me gluten free burger options",
    "How many calories does the Colonel have?",
    "Can I get a Whopper?"
]

In [14]:
for query in test_queries:
    print('Query:', query)
    init_time = time.time()
    result = retriever.query(
        query=query,
        search_type="hybrid",
        query_properties=["name", "category^3"],
        fusion_type=wvc.query.HybridFusion.RELATIVE_SCORE,
        alpha = 0.9,
        limit = 10,
        auto_limit = 1)
    
    print('Time taken: ', (time.time() - init_time)*1000, "ms")
    print([res.properties['name'] for res in result.objects], "\n\n")

Query: Hi, do you have cola?
Time taken:  18.758058547973633 ms
['Pepsi'] 


Query: Hi I want to have a Fire Zinger Stacker without sauce and a cola
Time taken:  14.547348022460938 ms
['Fire Zinger Stacker', 'Fire Zinger Stacker meal'] 


Query: Give me a Veggie Tender, medium, with salad
Time taken:  17.27581024169922 ms
['Veggie Tender', '4 Veggie Tender meal', '4 Veggie Tender meal', 'Veggie Tenders'] 


Query: Give me an orange chocolate milkshake, medium
Time taken:  13.79847526550293 ms
['Chocolate Sundae'] 


Query: Give me gluten free burger options
Time taken:  12.88461685180664 ms
['Tower Burger', 'Zinger Burger', 'Filet Burger'] 


Query: How many calories does the Colonel have?
Time taken:  9.139537811279297 ms
['Colonel Burger', 'Colonel Burger Meal', 'Colonel Stacker', 'Colonel Stacker Meal'] 


Query: Can I get a Whopper?
Time taken:  12.122154235839844 ms
['Colonel Burger', 'Colonel Stacker'] 




# 4. Mistral Agent

In [24]:
!huggingface-cli download TheBloke/Mistral-7B-Instruct-v0.2-GGUF mistral-7b-instruct-v0.2.Q3_K_M.gguf --local-dir ./models --local-dir-use-symlinks False

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Consider using `hf_transfer` for faster downloads. This solution comes with some limitations. See https://huggingface.co/docs/huggingface_hub/hf_transfer for more details.
downloading https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q3_K_M.gguf to /home/raihan/.cache/huggingface/hub/tmpbcxj82mz
mistral-7b-instruct-v0.2.Q3_K_M.gguf: 100%|█| 3.52G/3.52G [02:14<00:00, 26.1MB/s
./models/mistral-7b-instruct-v0.2.Q3_K_M.gguf


In [1]:
from langchain.callbacks.manager import CallbackManager  
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
# from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_community.llms.llamacpp import LlamaCpp
# from langchain_community.chat_models import ChatOllama


model = LlamaCpp(
    model_path="./models/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
    temperature=0.3,
    max_tokens=512,
    n_threads = 8
)

llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from ./models/mistral-7b-instruct-v0.1.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 l

In [25]:
import ast
import re
import weaviate
import weaviate.classes as wvc
from drive_thru_bot.retriever import Retriever
order = []
chat_history = []
client = weaviate.connect_to_local()
retriever = Retriever(client=client, collection_name="kfc_menu")

async def router(query: str, chat_history: list, order: list):
    ROUTER_PROMPT = f"""
    <s>[INST] You are Drive Thru Employee at KFC. You keep answers very brief and you are very welcoming. 
    Given the query, chat history, and order, you must decide whether to take the order, chat with the customer, or checkout the order.
    You have the following actions:
    - Take Order
    - Checkout Order
    
    Always and ONLY return a JSON object containing "action" as the key and the action as the value.
    For example :
    If customer said "I want a zinger burger meal with no sauce, veggie burger meal with no cheese, and a cola", 
    you should return: {{"action": "Take Order"}}
    
    If customer said "I want to checkout" or if you asked the customer if they want anything else and they say "No",
    you should return: {{"action": "Checkout Order"}}
    
    Chat History:
    {chat_history}
    
    Order:
    {order}
    
    Query:
    {query}
    [/INST]
    """
    chat_history.append({
        "role": "user",
        "content": query
    })
    output = await model.ainvoke(ROUTER_PROMPT)
    return ast.literal_eval(output.strip())


async def take_order(query: str):
    prompt = f"""<s>[INST] You are Drive Thru Employee at KFC. You keep answers very brief and you are very welcoming. 
You are able to take orders, revise orders, and ask for clarification. Once the customer is satisfied, you tell them to go to the next window. 
When the customer lists their order, list individual items their quantities, and additional notes as a list of dictionaries. 

For example: 
Customer says "I want 3 Zinger Burger without sauce and a cola", you answer in a form of a list of dictionaries:
[
    {{"name": "Zinger Burger", "quantity": 3, "notes": "without sauce"}},
    {{"name": "Cola", "quantity": 1}}
].

If customer says "I want a zinger burger meal with no sauce, veggie burger meal with no cheese, and a cola":
[
    {{ "name": "Zinger Burger", "quantity": 1, "meal": True, "notes": "no sauce"}},
    {{ "name": "Veggie Burger", "quantity": 1, "meal": True, "notes": "no cheese"}},
    {{ "name": "Cola", "quantity": 1}}
].

DO NOT FORGET THE MEAL KEY. IF THE CUSTOMER ORDERS A MEAL, YOU MUST INCLUDE THE MEAL KEY.
YOU MUST ONLY PRINT THE LIST OF DICTIONARIES. NOTHING ELSE.
[/INST]
[INST]{query}[/INST]
    """
    chat_history.append({
        "role": "customer",
        "content": query
    })
    output = await model.ainvoke(prompt)
    
    
    # Convert string to list of dict
    list_of_items = ast.literal_eval(output.strip())
    results = []
    for item in list_of_items:
        name = item['Name']
        meal_bool = item.get('Meal', False)
        if meal_bool:
            filter = wvc.query.Filter.by_property("category").equal("Menus")
        else:
            filter = None
            
        res = retriever.query(
            query=name,
            search_type="hybrid",
            query_properties=["name"],
            filters = filter,
            fusion_type=wvc.query.HybridFusion.RELATIVE_SCORE,
            alpha = 0.1,
            limit = 10,
            auto_limit = 1).objects[0].properties
        
        
        info = {key: res.get(key, None) for key in res.keys() if key in ['name', 'price', 'available', 'contents']}
        info['quantity'] = item['quantity']
        info['notes'] = item.get('Notes', "None")
        
        order.append(info)
        
    total_price = sum([item['price'] * item['quantity'] for item in results])
    order_summary = """Order:\n{}\n\nTotal Price: {} AED\n\nWould you like anything else?
          """.format(
              '\n'.join(["- " + f"{item['name']} " + f"{item['quantity']} x {item['price']} AED, Notes: {item.get("Notes", None)}" for item in results]),
                total_price
          )
          
    chat_history.append({"role": "assistant", "content": order_summary})
    print(order_summary)
    
    # return order, chat_history
    # order_confirm_prompt = f"""<s>[INST] You are Drive Thru Employee at KFC. You keep answers very brief and you are very welcoming.
    
    # GIVEN THE ORDER:
    # {results}
    
    # AND THE TOTAL PRICE: {total_price}
    
    # DO NOT GREET THE CUSTOMER AGAIN. JUST CONFIRM THE ORDER.
    # Repeat the order to the customer in a list format:
    #  - Item name, price x quantity, and any additional notes.
    # Then repeat the total price.
    # Then ask if the customer wants to add anything else. 
    # [/INST]
    # """
    
    # await model.ainvoke(order_confirm_prompt)
        

  retriever = Retriever(client=client, collection_name="kfc_menu")


In [27]:
await router("Hi, I want to have a Fire Zinger Stacker without sauce and a cola", chat_history, order)


    

Llama.generate: prefix-match hit



    {"action": "Take Order"}


llama_print_timings:        load time =     871.22 ms
llama_print_timings:      sample time =       1.94 ms /    10 runs   (    0.19 ms per token,  5157.30 tokens per second)
llama_print_timings: prompt eval time =    7988.88 ms /    74 tokens (  107.96 ms per token,     9.26 tokens per second)
llama_print_timings:        eval time =    2768.83 ms /     9 runs   (  307.65 ms per token,     3.25 tokens per second)
llama_print_timings:       total time =   10811.98 ms /    83 tokens


{'action': 'Take Order'}

In [16]:
query = "Can I have 2 Veggie Burger, 1 Zinger Burger, and 1 Cola? Also I don't any cheese in the Veggie Burger"
await take_order(query)

Llama.generate: prefix-match hit



    [
        {"Name": "Veggie Burger", "quantity": 2, "Notes": "no cheese"},
        {"Name": "Zinger Burger", "quantity": 1},
        {"Name": "Cola", "quantity": 1}
    ]


llama_print_timings:        load time =     871.22 ms
llama_print_timings:      sample time =      13.43 ms /    68 runs   (    0.20 ms per token,  5062.16 tokens per second)
llama_print_timings: prompt eval time =    4052.40 ms /    37 tokens (  109.52 ms per token,     9.13 tokens per second)
llama_print_timings:        eval time =   17929.69 ms /    67 runs   (  267.61 ms per token,     3.74 tokens per second)
llama_print_timings:       total time =   22206.49 ms /   104 tokens


Order:
- Veggie Burger 2 x 10.0 AED
- Zinger Burger 1 x 15.0 AED
- Pepsi 1 x 2.8 AED

Total Price: 37.8 AED

Would you like anything else?
          


In [2]:
from langchain.chat_models import ChatOllama

model = ChatOllama(
    model="openhermes:7b-mistral-v2.5-q4_K_M"
)


In [10]:
async for chunk in model.astream("Hi"):
    print(chunk.content)

Hello
!
 How
 can
 I
 help
 you
 today
?
 If
 you
 have
 any
 questions
 or
 need
 assistance
,
 feel
 free
 to
 ask
.



In [34]:
from langchain_core.tools import tool

@tool
async def take_order(query: str):
    """
    Takes the order from the customer and returns the order summary and the total price.
    """
    prompt = f"""<s>[INST] You are Drive Thru Employee at KFC. You keep answers very brief and you are very welcoming. 
You are able to take orders, revise orders, and ask for clarification. Once the customer is satisfied, you tell them to go to the next window. 
When the customer lists their order, list individual items their quantities, and additional notes as a list of dictionaries. 

For example: 
Customer says "I want 3 Zinger Burger without sauce and a cola", you answer in a form of a list of dictionaries:
[
    {{"Name": "Zinger Burger", "quantity": 3, "Notes": "without sauce"}},
    {{"Name": "Cola", "quantity": 1}}
].

If customer says "I want a zinger burger meal with no sauce, veggie burger meal with no cheese, and a cola":
[
    {{ "Name": "Zinger Burger", "quantity": 1, "Meal": True, "Notes": "no sauce"}},
    {{ "Name": "Veggie Burger", "quantity": 1, "Meal": True, "Notes": "no cheese"}},
    {{ "Name": "Cola", "quantity": 1}}
].

DO NOT FORGET THE MEAL KEY. IF THE CUSTOMER ORDERS A MEAL, YOU MUST INCLUDE THE MEAL KEY.
YOU MUST ONLY PRINT THE LIST OF DICTIONARIES. NOTHING ELSE.
[/INST]
[INST]{query}[/INST]
    """
    # chat_history.append({
    #     "role": "customer",
    #     "content": query
    # })
    output = await model.ainvoke(prompt)
    
    
    # Convert string to list of dict
    list_of_items = ast.literal_eval(output.strip())
    results = []
    for item in list_of_items:
        name = item['Name']
        meal_bool = item.get('Meal', False)
        if meal_bool:
            filter = wvc.query.Filter.by_property("category").equal("Menus")
        else:
            filter = None
            
        res = retriever.query(
            query=name,
            search_type="hybrid",
            query_properties=["name"],
            filters = filter,
            fusion_type=wvc.query.HybridFusion.RELATIVE_SCORE,
            alpha = 0.1,
            limit = 10,
            auto_limit = 1).objects[0].properties
        
        
        info = {key: res.get(key, None) for key in res.keys() if key in ['name', 'price', 'available', 'contents']}
        info['quantity'] = item['quantity']
        
        order.append(info)
    total_price = sum([item['price'] * item['quantity'] for item in results])
    
    order_summary = """Order:\n{}\n\nTotal Price: {} AED
          """.format(
              '\n'.join(["- " + f"{item['name']} " + f"{item['quantity']} x {item['price']} AED" for item in results]),
                total_price
          )
          
    # chat_history.append({"role": "assistant", "content": order_summary})
    return order_summary

In [35]:
from langchain.agents.initialize import initialize_agent
from langchain.memory import ConversationBufferWindowMemory
memory = ConversationBufferWindowMemory(
        memory_key="chat_history",
        input_key="input",
        output_key="output",
        return_messages=True,
        k=5,
)

agent = initialize_agent(
    tools = [take_order],
    agent_name="KFC Drive Thru",
    llm = model,
    agent="chat-conversational-react-description",
    verbose=True,
    memory = memory,
)
    




In [57]:
print(agent.agent.llm_chain.prompt.messages[0].prompt.template)

Assistant is a large language model trained by OpenAI.

Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.

Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics.

Overall, Assistant is a powerful system that can help with a wide range of task

In [None]:


SYSTEM_PROMPT = """
<s>[INST] You are Drive Thru Employee at KFC. You keep answers very brief and you are very welcoming. 
Your job is to take orders, revise orders, and ask for clarification. Once the customer is satisfied, you tell them to go to the next window.
You have access to the following tools:
- take_order(query: str) -> str. Takes the order from the customer and returns the order summary and the total price.
- checkout_order(order: list) -> str. Checks out the order and returns the order summary and the total price.
"""



In [None]:
CREATE_ORDER_ENDPOINT = "https://api.ncr.com/order/3/orders/1"