In [45]:
#packages needed for notebook
#!pip install llama-index
#!pip install llama-index-core
#!pip install llama-index-embeddings-openai
#!pip install llama-index-postprocessor-flag-embedding-reranker
#!pip install git+https://github.com/FlagOpen/FlagEmbedding.git
#!pip install llama-parse

In [1]:
# llama-parse is async-first, running the async code in a notebook requires the use of nest_asyncio
import nest_asyncio
nest_asyncio.apply()

In [44]:
#api keys are loaded from .env file located in notebook directory; with format:
#LLAMA_CLOUD_API_KEY=llx-your-llama-cloud-api-key (https://cloud.llamaindex.ai/api-key)
#OPENAI_API_KEY=sk-your-openai-key (https://platform.openai.com/api-keys)

In [2]:
# Access the API through environment variable
import os
from dotenv import load_dotenv
load_dotenv()

openai_api_key = os.getenv('OPENAI_API_KEY')
llama_cloud_api_key = os.getenv('LLAMA_CLOUD_API_KEY')

In [13]:
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import VectorStoreIndex
from llama_index.core import Settings

embed_model=OpenAIEmbedding(model="text-embedding-3-small")
llm = OpenAI(model="gpt-3.5-turbo-0125")

Settings.llm = llm
Settings.embed_model = embed_model

In [4]:
from llama_parse import LlamaParse
from pathlib import Path

# This constructs a Path object for the "data" directory.
data_dir = Path('catalog')

# This constructs the full path to the document within the "data" directory.
file_path = data_dir / 'laser-measurement-and-control-cat.pdf'

# Use the constructed path in your method call
documents = LlamaParse(result_type="markdown").load_data(file_path)

Started parsing the file under job_id 610d0ae0-f54b-4587-8a45-6fb7abe91c6f


In [52]:
type(documents[0])

llama_index.core.schema.Document

In [None]:
print(documents[0].text[10000:11000] + '...')

In [7]:
sum(len(documents) for document in documents)

1

In [8]:
num_characters = len(documents[0].text)
print(f"Number of characters in documents[0]: {num_characters}")

Number of characters in documents[0]: 282789


In [15]:
from llama_index.core.node_parser import MarkdownElementNodeParser

In [16]:
#Splits a markdown document into Text Nodes and Index Nodes corresponding to embedded objects (e.g. tables).
node_parser = MarkdownElementNodeParser(llm=OpenAI(model="gpt-3.5-turbo-0125"))

In [17]:
nodes = node_parser.get_nodes_from_documents(documents)

Embeddings have been explicitly disabled. Using MockEmbedding.


223it [00:00, 79251.80it/s]
100%|█████████████████████████████████████████████████████████████████████████████████| 223/223 [01:50<00:00,  2.01it/s]


In [32]:
print(len(nodes))

629


In [None]:
print(nodes[300].text[:500] + '...')

In [19]:
base_nodes, objects = node_parser.get_nodes_and_objects(nodes)

In [20]:
recursive_index = VectorStoreIndex(nodes=base_nodes+objects)
raw_index = VectorStoreIndex.from_documents(documents)

In [55]:
import os.path
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
)
persist_dir_recursive = "./storage/adv_cat/recursive"
persist_dir_raw = "./storage/adv_cat/raw"

# store index for later
recursive_index.storage_context.persist(persist_dir=persist_dir_recursive)
raw_index.storage_context.persist(persist_dir=persist_dir_raw)

In [56]:
# load the index
storage_context_recursive = StorageContext.from_defaults(persist_dir=persist_dir_recursive)
storage_context_raw = StorageContext.from_defaults(persist_dir=persist_dir_raw)
recursive_index = load_index_from_storage(storage_context_recursive)
raw_index = load_index_from_storage(storage_context_raw)

In [57]:
from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker

reranker = FlagEmbeddingReranker(
    top_n=5,
    model="BAAI/bge-reranker-large",
)

recursive_query_engine = recursive_index.as_query_engine(
    similarity_top_k=15, 
    node_postprocessors=[reranker], 
    verbose=True
)

raw_query_engine = raw_index.as_query_engine(similarity_top_k=15, node_postprocessors=[reranker])

In [58]:
query = "What power range can a PM10 measure?"

response_1 = raw_query_engine.query(query)
print("\n***********New LlamaParse+ Basic Query Engine***********")
print(response_1)

response_2 = recursive_query_engine.query(query)
print("\n***********New LlamaParse+ Recursive Retriever Query Engine***********")
print(response_2)


***********New LlamaParse+ Basic Query Engine***********
What power range can a PM10 measure?
Answer: 5 mW to 10 W
[1;3;38;2;11;159;203mRetrieval entering id_c6bdc0b3-387b-4b48-848a-8e08dc3e64cb_453_table: TextNode
[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query What power range can a PM10 measure?
[0m[1;3;38;2;11;159;203mRetrieval entering id_c6bdc0b3-387b-4b48-848a-8e08dc3e64cb_447_table: TextNode
[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query What power range can a PM10 measure?
[0m[1;3;38;2;11;159;203mRetrieval entering id_c6bdc0b3-387b-4b48-848a-8e08dc3e64cb_479_table: TextNode
[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query What power range can a PM10 measure?
[0m[1;3;38;2;11;159;203mRetrieval entering id_c6bdc0b3-387b-4b48-848a-8e08dc3e64cb_437_table: TextNode
[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query What power range can a PM10 measure?
[0m
***********New LlamaParse+ Recursive Re

In [59]:
query = "What different cable types are available for a PM10 sensor?"

response_1 = raw_query_engine.query(query)
print("\n***********New LlamaParse+ Basic Query Engine***********")
print(response_1)

response_2 = recursive_query_engine.query(query)
print("\n***********New LlamaParse+ Recursive Retriever Query Engine***********")
print(response_2)


***********New LlamaParse+ Basic Query Engine***********
The different cable types available for a PM10 sensor are PM DB-25 and Power.
[1;3;38;2;11;159;203mRetrieval entering id_c6bdc0b3-387b-4b48-848a-8e08dc3e64cb_447_table: TextNode
[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query What different cable types are available for a PM10 sensor?
[0m[1;3;38;2;11;159;203mRetrieval entering id_c6bdc0b3-387b-4b48-848a-8e08dc3e64cb_581_table: TextNode
[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query What different cable types are available for a PM10 sensor?
[0m[1;3;38;2;11;159;203mRetrieval entering id_c6bdc0b3-387b-4b48-848a-8e08dc3e64cb_389_table: TextNode
[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query What different cable types are available for a PM10 sensor?
[0m[1;3;38;2;11;159;203mRetrieval entering id_c6bdc0b3-387b-4b48-848a-8e08dc3e64cb_445_table: TextNode
[0m[1;3;38;2;237;90;200mRetrieving from object TextNode wit

In [25]:
query = "What are the best three choices of power sensors to measure a 5W 1064nm laser with a 5mm beam diameter?"

response_1 = raw_query_engine.query(query)
print("\n***********New LlamaParse+ Basic Query Engine***********")
print(response_1)

response_2 = recursive_query_engine.query(query)
print("\n***********New LlamaParse+ Recursive Retriever Query Engine***********")
print(response_2)


***********New LlamaParse+ Basic Query Engine***********
LM-3, LM-10, and LM-45 would be the best three choices of power sensors to measure a 5W 1064nm laser with a 5mm beam diameter.

***********New LlamaParse+ Recursive Retriever Query Engine***********
PowerMax - Laser Power Sensors with a power range of 5 µW to 140 mW, featuring a large 8 mm and 10 mm apertures and high-sensitivity Silicon photodiode would be suitable for measuring a 5W 1064nm laser with a 5mm beam diameter. The PowerMax-USB UV/VIS Quantum sensors, which incorporate a Silicon photodiode, would also be a good choice. Additionally, the PowerMax-Pro sensor with a power range of 100 mW to 150 W could be considered for this measurement.


In [26]:
query = "What are the specs for a PM10?"

response_1 = raw_query_engine.query(query)
print("\n***********New LlamaParse+ Basic Query Engine***********")
print(response_1)

response_2 = recursive_query_engine.query(query)
print("\n***********New LlamaParse+ Recursive Retriever Query Engine***********")
print(response_2)


***********New LlamaParse+ Basic Query Engine***********
The specifications for a PM10 are as follows:
- Wavelength Range: 0.25 to 3 µm
- Power Range: 10 mW to 10 W
- Maximum Intermittent Power (<5 min.): 15 W
- Resolution: 1 mW
- Maximum Power Density: 50 W/cm2
- Maximum Energy Density: 2 J/cm2 at 1064 nm, 10 ns
- Response Time: 3 sec.
- Detector Coating: Volume Absorbing
- Active Area Diameter: 19 mm
- Calibration Uncertainty: ±1%
- Calibration Wavelength: 514 nm
- Cooling Method: Air-cooled
- Cable Type: PM DB-25
- Cable Length: 2 m
- Part Number: 1098338
[1;3;38;2;11;159;203mRetrieval entering id_c6bdc0b3-387b-4b48-848a-8e08dc3e64cb_453_table: TextNode
[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query What are the specs for a PM10?
[0m[1;3;38;2;11;159;203mRetrieval entering id_c6bdc0b3-387b-4b48-848a-8e08dc3e64cb_447_table: TextNode
[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query What are the specs for a PM10?
[0m[1;3;38;2;11;159;203mRe

In [27]:
query = "Using the power sensor spec tables, which would be more appropriate for measuring a 5 Watt laser: A PowerMax-USB UV/VIS or a PM10?"

response_1 = raw_query_engine.query(query)
print("\n***********New LlamaParse+ Basic Query Engine***********")
print(response_1)

response_2 = recursive_query_engine.query(query)
print("\n***********New LlamaParse+ Recursive Retriever Query Engine***********")
print(response_2)


***********New LlamaParse+ Basic Query Engine***********
A PowerMax-USB UV/VIS sensor would be more appropriate for measuring a 5 Watt laser.
[1;3;38;2;11;159;203mRetrieval entering id_c6bdc0b3-387b-4b48-848a-8e08dc3e64cb_249_table: TextNode
[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query Using the power sensor spec tables, which would be more appropriate for measuring a 5 Watt laser: A PowerMax-USB UV/VIS or a PM10?
[0m
***********New LlamaParse+ Recursive Retriever Query Engine***********
PowerMax-USB UV/VIS would be more appropriate for measuring a 5 Watt laser.


In [29]:
query = "Using the power sensor spec tables, why would you think that a PowerMax-USB UV/VIS is more appropriate for measuring a 5 Watt laser than a PM10?"

response_1 = raw_query_engine.query(query)
print("\n***********New LlamaParse+ Basic Query Engine***********")
print(response_1)

response_2 = recursive_query_engine.query(query)
print("\n***********New LlamaParse+ Recursive Retriever Query Engine***********")
print(response_2)


***********New LlamaParse+ Basic Query Engine***********
The PowerMax-USB UV/VIS sensor would be more appropriate for measuring a 5 Watt laser than a PM10 because the PowerMax-USB UV/VIS sensor has a higher power range that includes the 5 Watt laser power level. The PowerMax-USB UV/VIS sensor is designed to measure power from 10 nW up to 50 mW, while the PM10 sensor is designed for a power range of 10 mW to 30 W, which does not cover the 5 Watt power level.
[1;3;38;2;11;159;203mRetrieval entering id_c6bdc0b3-387b-4b48-848a-8e08dc3e64cb_249_table: TextNode
[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query Using the power sensor spec tables, why would you think that a PowerMax-USB UV/VIS is more appropriate for measuring a 5 Watt laser than a PM10?
[0m
***********New LlamaParse+ Recursive Retriever Query Engine***********
The PowerMax-USB UV/VIS sensor is more appropriate for measuring a 5 Watt laser than a PM10 because the PowerMax-USB UV/VIS sensor has a power ran

In [30]:
query = "What are the specs for the PowerMax-USB UV/VIS?"

response_1 = raw_query_engine.query(query)
print("\n***********New LlamaParse+ Basic Query Engine***********")
print(response_1)

response_2 = recursive_query_engine.query(query)
print("\n***********New LlamaParse+ Recursive Retriever Query Engine***********")
print(response_2)


***********New LlamaParse+ Basic Query Engine***********
Wavelength Range: 325 nm to 1065 nm  
Power Range: 5 µW to >100 mW  
Noise Equivalent Power: 100 nW  
Maximum Power Density: 20 W/cm2  
Response Time: Speed-up On: 0.1 sec. (UV/VIS), 0.5 sec. (Wand UV/VIS)  
Detector Element: Silicon photodiode  
Detector Diameter: 10 mm (UV/VIS), 8 mm (Wand UV/VIS)  
Calibration Uncertainty: ±1%  
Power Linearity: ±1% (UV/VIS), Powe (Wand UV/VIS)  
Spectral Compensation Accuracy: ±4% (325 to 900 nm), ±5% (900 to 1065 nm)  
Calibration Wavelength: 514 nm  
Cooling Method: Air  
Cable Type: USB  
Cable Length: 2.5 m  
Part Numbers: 1168337 (UV/VIS), 1299161 (Wand UV/VIS)
[1;3;38;2;11;159;203mRetrieval entering id_c6bdc0b3-387b-4b48-848a-8e08dc3e64cb_249_table: TextNode
[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query What are the specs for the PowerMax-USB UV/VIS?
[0m[1;3;38;2;11;159;203mRetrieval entering id_c6bdc0b3-387b-4b48-848a-8e08dc3e64cb_219_table: TextNode
[0m[1;