# Set the Openai API key to os.environ variable

In [11]:
import os
import openai
import nest_asyncio

OPENAI_API_KEY="sk-"

nest_asyncio.apply()

In [13]:
os.environ["OPENAI_API_KEY"] = "sk-"
openai.api_key = os.environ["OPENAI_API_KEY"]

# Load Data

In [14]:
from pathlib import Path
from llama_index import download_loader
from llama_index import VectorStoreIndex, ServiceContext, StorageContext
from llama_index.llms import OpenAI

JSONReader = download_loader("JSONReader")

loader = JSONReader()
documents = loader.load_data(Path('data.txt'))

In [4]:
documents

[Document(id_='6fdde138-228c-4de1-b319-089612c31903', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='11fd8c0e11e7a39ad102dc20bbd0b3f30be3ddf7d9793382cdc8b3556d56fe49', text='{\n"breakfast": [\n{\n"name": "Classic Eggs Benedict",\n"description": "Poached eggs on toasted English muffins with Canadian bacon and hollandaise sauce.",\n"price": 12.99,\n"ingredients": [\n"Eggs",\n"English muffin",\n"Canadian bacon",\n"Hollandaise sauce"\n],\n"options": [\n"Gluten-free muffin",\n"Turkey bacon"\n],\n"availability": "7am - 11am"\n{\n"name": "Blueberry Pancakes",\n"description": "Fluffy pancakes filled with fresh blueberries, served with maple syrup and whipped cream.",\n"price": 10.5,\n"ingredients": [\n"Flour",\n"Blueberries",\n"Eggs",\n"Milk",\n"Maple syrup",\n"Whipped cream"\n],\n"options": [\n"Add banana",\n"Chocolate chips"\n],\n"availability": "7am - 11am"\n}\n],\n"coffee": [\n{\n"name": "Espresso",\n"description": "Rich

# Setting up Vector Indices

When documents are ingested into an index, they are split into chunks with a certain amount of overlap. The default chunk size is 1024, while the default chunk overlap is 20.

Changing either of these parameters will change the embeddings that are calculated. A smaller chunk size means the embeddings are more precise, while a larger chunk size means that the embeddings may be more general, but can miss fine-grained details.

Furthermore, when changing the chunk size for a vector index, you may also want to increase the similarity_top_k parameter to better represent the amount of data to retrieve for each query.

Here, we halved the default chunk size, the example also doubles the similarity_top_k from the default of 2 to 4.

In [32]:
# chunk the data sets
# Construct an instance of LlamaIndex’s ServiceContext, It will be used during RAG pipeline's indexing and querying stages
# ServiceContext allows us to adjust settings such as the LLM and embedding model used
llm = OpenAI(model="gpt-4",openai_api_key=OPENAI_API_KEY)

service_context = ServiceContext.from_defaults(
    chunk_size=512, chunk_overlap=50,llm=llm
)

# creating index from the document
#creates queryable indices from these chunks into the Knowledge Base
index = VectorStoreIndex.from_documents(
    documents, service_context=service_context
)

# define the above index as query_engine
query_engine = index.as_query_engine(similarity_top_k=4)

In [18]:
# testing the quesy engine
response = query_engine.query("What are the available breakfast options?")

In [24]:
response.response

'The available breakfast options are Classic Eggs Benedict and Blueberry Pancakes. The Classic Eggs Benedict is made with poached eggs on toasted English muffins with Canadian bacon and hollandaise sauce, and it costs $12.99. You can also choose to have a gluten-free muffin or turkey bacon. The Blueberry Pancakes are fluffy pancakes filled with fresh blueberries, served with maple syrup and whipped cream, and they cost $10.5. You can add banana or chocolate chips to the pancakes. Both breakfast options are available from 7am to 11am.'

# Setting up the Chatbot Agent

In [25]:
from llama_index.tools import QueryEngineTool
from llama_index.tools import QueryEngineTool, ToolMetadata

In [34]:
# create a Query Engine tool
query_engine_tool = QueryEngineTool(
    query_engine=query_engine,
    metadata=ToolMetadata(
        name="question_query_engine",
        description="useful for when you want to answer queries that require analyzing restaurent menu data",
    ),
)
tools = [query_engine_tool]
from llama_index.agent import OpenAIAgent

agent = OpenAIAgent.from_tools(tools, verbose=True)

In [35]:
# Testing the agent
response = agent.chat("What are the available breakfast options?")
print(str(response))

Added user message to memory: What are the available breakfast options?
=== Calling Function ===
Calling function: question_query_engine with args: {
  "input": "What are the available breakfast options?"
}
Got output: The available breakfast options are "Classic Eggs Benedict" and "Blueberry Pancakes". The Classic Eggs Benedict is made with poached eggs on toasted English muffins with Canadian bacon and hollandaise sauce, and it costs $12.99. You can also choose to have a gluten-free muffin or turkey bacon. The Blueberry Pancakes are fluffy pancakes filled with fresh blueberries, served with maple syrup and whipped cream, and they cost $10.5. You can add banana or chocolate chips to the pancakes. Both breakfast options are available from 7am to 11am.

The available breakfast options are "Classic Eggs Benedict" and "Blueberry Pancakes". The Classic Eggs Benedict is made with poached eggs on toasted English muffins with Canadian bacon and hollandaise sauce, and it costs $12.99. You can 

# Setting up the Chatbot Loop

In [31]:
agent = OpenAIAgent.from_tools(tools)  # verbose=False by default

while True:
    text_input = input("User: ")
    if text_input == "exit":
        break
    response = agent.chat(text_input)
    print(f"Agent: {response}")

User: What are the available breakfast options?
Agent: The available breakfast options are:
1. Classic Eggs Benedict - Poached eggs on toasted English muffins with Canadian bacon and hollandaise sauce. It costs $12.99 and you can choose to have a gluten-free muffin or turkey bacon as options.
2. Blueberry Pancakes - Fluffy pancakes filled with fresh blueberries, served with maple syrup and whipped cream. It costs $10.5 and you can add banana or chocolate chips as options.

Both breakfast options are available from 7am to 11am.
User: what is the price for Iced tea?
Agent: The price for Iced Tea is $3.5.
User: exit


# Using .json File

In [53]:
from pathlib import Path
from llama_index import download_loader
from llama_index import VectorStoreIndex, ServiceContext, StorageContext
from llama_index.llms import OpenAI

#downloading a specific loader,JSONReader
JSONReader = download_loader("JSONReader")

loader = JSONReader()
documents = loader.load_data(Path('data.json'))

In [54]:
llm = OpenAI(model="gpt-4",openai_api_key=OPENAI_API_KEY)

# Initializing service context with specified chunk parameters and language model
service_context = ServiceContext.from_defaults(
    chunk_size=512, chunk_overlap=50,llm=llm
)

# Creating an index from the document
# below step generates queryable indices (Node index) from document chunks within the Knowledge Base
# It generates vector embeddings that are stored in a specialized database called a vector store.
index = VectorStoreIndex.from_documents(
    documents, service_context=service_context
)

# define the above index as query_engine
json_query_engine = index.as_query_engine(similarity_top_k=4)

In [57]:
response = json_query_engine.query("What are the available breakfast options?")
print(str(response))

The available breakfast options are "Classic Eggs Benedict" and "Blueberry Pancakes". The Classic Eggs Benedict is made with poached eggs on toasted English muffins with Canadian bacon and hollandaise sauce, and it costs $12.99. You can also choose to have a gluten-free muffin or turkey bacon. The Blueberry Pancakes are fluffy pancakes filled with fresh blueberries, served with maple syrup and whipped cream, and they cost $10.5. You can add banana or chocolate chips to the pancakes. Both breakfast options are available from 7am to 11am.


# Using JSON QUERY ENGINE

In [1]:
#!pip install jsonpath-ng

Notes:</br>
-Json Schema- Standardised way to describe Json structure
-we can use chatGPt to create json schema for the json data</br>
-Json value, Json schema, User question are the inputs to a JSONQueryEngine</br>
-JSONQueryEngine will generate an ExecuteJSONPathQuery</br>
-we can extract the response from ExecuteJSONPathQuery</br>

In [58]:
# Create a JSon schema for the json file- we can use chatGPT to create the schema for us

In [23]:
# import libraries
import json
from llama_index.indices.service_context import ServiceContext
from llama_index.llms import OpenAI
from llama_index.indices.struct_store import JSONQueryEngine
from IPython.display import Markdown, display

In [5]:
# load json data & its schema
# Opening JSON file
with open('data.json') as f:
    # returns JSON object as a dictionary
    data = json.load(f)
    
with open('schema.json') as f:
    json_schema=json.load(f)

In [14]:
# create query engine
llm = OpenAI(model="gpt-4",openai_api_key=OPENAI_API_KEY)
# create a service_context for the model we are using
service_context = ServiceContext.from_defaults(llm=llm)
# create a query_engine that will return Natural language Response
nl_query_engine = JSONQueryEngine(
    json_value=data,
    json_schema=json_schema,
    service_context=service_context,
)
# create another query_engine that will return Raw JSON Response 
raw_query_engine = JSONQueryEngine(
    json_value=data,
    json_schema=json_schema,
    service_context=service_context,
    synthesize_response=False,
)

In [15]:
# make query
nl_response = nl_query_engine.query(
    "What are the breakfast options available?",
)

In [46]:
nl_response

Response(response='The breakfast item available is the Classic Eggs Benedict. It is described as poached eggs on toasted English muffins with Canadian bacon and hollandaise sauce. The price is $12.99. The ingredients include eggs, English muffin, Canadian bacon, and hollandaise sauce. There are options for a gluten-free muffin and turkey bacon. It is available from 7am to 11am.', source_nodes=[], metadata={'json_path_response_str': '$.breakfast[*]'})

In [45]:
# display the response as markdown
display(
    Markdown(f"{nl_response}")
)

The breakfast item available is the Classic Eggs Benedict. It is described as poached eggs on toasted English muffins with Canadian bacon and hollandaise sauce. The price is $12.99. The ingredients include eggs, English muffin, Canadian bacon, and hollandaise sauce. There are options for a gluten-free muffin and turkey bacon. It is available from 7am to 11am.

In [20]:
# make query to raw_query_engine. it will return  Raw JSON Response 
raw_response = raw_query_engine.query(
    "What are the breakfast options available?",
)

In [21]:
raw_response

Response(response='{"name": "Classic Eggs Benedict"}', source_nodes=[], metadata={'json_path_response_str': '$.breakfast[*].name'})

In [48]:
# display the response as markdown
display(
    Markdown(f"{raw_response}")
)

{"name": "Classic Eggs Benedict"}

In [52]:
# response from the 2 different query engines
display(
    Markdown(f"<h4>Natural language Response</h4><br>{nl_response}")
)
display(Markdown(f"<h4>Raw JSON Response</h4><br>{raw_response}<"))

<h4>Natural language Response</h4><br>The breakfast item available is the Classic Eggs Benedict. It is described as poached eggs on toasted English muffins with Canadian bacon and hollandaise sauce. The price is $12.99. The ingredients include eggs, English muffin, Canadian bacon, and hollandaise sauce. There are options for a gluten-free muffin and turkey bacon. It is available from 7am to 11am.

<h4>Raw JSON Response</h4><br>{"name": "Classic Eggs Benedict"}<

In [42]:
nl_query_engine.query(
    "What are the drinks items you have?",
)

Response(response='We have a drink item called Iced Tea. It is a refreshing iced tea brewed with a hint of lemon. It costs $3.5. The ingredients are Tea and Lemon. You can choose between sweetened and unsweetened options. It is available all day.', source_nodes=[], metadata={'json_path_response_str': '$.drinks[*]'})

In [43]:
print("Raw NL Response:", response_text)

Raw NL Response: The breakfast item available is the Classic Eggs Benedict. It is described as poached eggs on toasted English muffins with Canadian bacon and hollandaise sauce. The price is $12.99. The ingredients include eggs, English muffin, Canadian bacon, and hollandaise sauce. There are options for a gluten-free muffin and turkey bacon. It is available from 7am to 11am.


In [31]:
nl_query_engine.query(
    "Is breakfast item Classic Eggs Benedict contains eggs?",
)

Response(response='Yes, the breakfast item Classic Eggs Benedict contains eggs.', source_nodes=[], metadata={'json_path_response_str': "$.breakfast[?(@.name=='Classic Eggs Benedict')].ingredients[*]"})

In [33]:
nl_query_engine.query(
    "List all breakfast items.",
)

Response(response='The breakfast item available is the Classic Eggs Benedict. It is described as poached eggs on toasted English muffins with Canadian bacon and hollandaise sauce. The price is $12.99. The ingredients include eggs, English muffin, Canadian bacon, and hollandaise sauce. There are options for a gluten-free muffin and turkey bacon. It is available from 7am to 11am.', source_nodes=[], metadata={'json_path_response_str': '$.breakfast[*]'})