In [None]:
#!pip install llama-index llama-index-packs-raptor llama-index-vector-stores-qdrant

In [1]:
from llama_index.packs.raptor import RaptorPack

# optionally download the pack to inspect/modify it yourself!
# from llama_index.core.llama_pack import download_llama_pack
# RaptorPack = download_llama_pack("RaptorPack", "./raptor_pack")

In [2]:
# Access the API through environment variable
import os
from dotenv import load_dotenv
load_dotenv()

openai_api_key = os.getenv('OPENAI_API_KEY')
llama_cloud_api_key = os.getenv('LLAMA_CLOUD_API_KEY')

In [3]:
import nest_asyncio

nest_asyncio.apply()

In [4]:
from llama_parse import LlamaParse
from pathlib import Path

In [5]:
# This constructs a Path object for the "data" directory.
data_dir = Path('catalog')

# This constructs the full path to document within the "data" directory.
file_path = data_dir / 'laser-measurement-and-control-cat.pdf'

In [None]:
# Use the constructed path in your method call
# documents = LlamaParse(result_type="markdown").load_data(file_path)

In [6]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(input_files=[file_path]).load_data()

In [7]:
from llama_index.core.node_parser import SentenceSplitter
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.vector_stores.chroma import ChromaVectorStore
import chromadb

client = chromadb.PersistentClient(path="./catalog/catalog_db")
collection = client.get_or_create_collection("catalog")

vector_store = ChromaVectorStore(chroma_collection=collection)

raptor_pack = RaptorPack(
    documents,
    embed_model=OpenAIEmbedding(
        model="text-embedding-3-small"
    ),  # used for embedding clusters
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),  # used for generating summaries
    vector_store=vector_store,  # used for storage
    similarity_top_k=2,  # top k for each layer, or overall top-k for collapsed
    mode="collapsed",  # sets default mode
    transformations=[
        SentenceSplitter(chunk_size=400, chunk_overlap=50)
    ],  # transformations applied for ingestion
)

Generating embeddings for level 0.
Performing clustering for level 0.
Generating summaries for level 0 with 71 clusters.
Level 0 created summaries/clusters: 71
Generating embeddings for level 1.
Performing clustering for level 1.
Generating summaries for level 1 with 12 clusters.
Level 1 created summaries/clusters: 12
Generating embeddings for level 2.
Performing clustering for level 2.
Generating summaries for level 2 with 3 clusters.
Level 2 created summaries/clusters: 3


In [8]:
nodes = raptor_pack.run("What are the specs for a PM10?", mode="collapsed")
print(len(nodes))
print(nodes[0].text)

2
PM100-19C PM150 PM150-50Power Range 300 mW to 150 WPowerMax - Laser Power Sensors
1/4-20 UNC
Female Mounting
Threads
51 mm
(2.0 in.)51 mm
(2.0 in.)
76 mm
(3.0 in.)89 mm
(3.5 in.)89 mm
(3.5 in.)
Ø19 mm
(0.75 in.)31 mm
(1.23 in.)
Note: Detector Surface
is 7.2 mm (0.28 in.) below
front face of aperture plateAdjustable
189-259 mm
(7.44-10.2 in.)3/8-16 UNC
Female Mounting ThreadsØ19 mm
(0.75 in.)
Ø151 mm
(5.95 in.)
Note: Detector Surface
is 32.3 mm (1.27 in.) below
front face of aperture plate3/8-16 UNC
Female Mounting
ThreadsØ50 mm
(1.97 in.)
Ø151 mm
(5.95 in.)
Note: Detector Surface
is 32.2 mm (1.27 in.) below
front face of aperture platePM150 and PM150-50 are our highest power convective air-cooled sensors. 
These sensor models are rated for continuous use up to 150W. The PM100-
19C, however, is a smaller sensor that can be operated air-cooled at 100W for 
up to 5 minutes before it must be allowed to cool down. 
Device  
Specifications
ISO/IEC 17025:2005
Power Sensors


In [9]:
nodes = raptor_pack.run(
    "What are the specs for a PM10?", mode="tree_traversal"
)
print(len(nodes))
print(nodes[0].text)

Retrieved parent IDs from level 2: ['97d26d3e-eba8-4807-ab90-3650a51428a7', 'a4afe412-8904-44ca-beea-f4a67bcbac4f']
Retrieved 4 from parents at level 2.
Retrieved parent IDs from level 1: ['2d02a20f-07fa-446d-9691-77784a7f43ff', '6e56a094-8316-47e7-a2f3-e9cabc702b99']
Retrieved 4 from parents at level 1.
Retrieved parent IDs from level 0: ['d7b77226-5c4b-46c8-8fa9-c2997c6914d9', 'cd9f8b4a-8568-49d4-8580-b0cf45731f0a']
Retrieved 4 from parents at level 0.
4
)36 mm
(1.4 in.)
PM3 only
39 mm
(1.53 in.)
PM3Q onlyØ19 mm
(0.75 in.)
Ø22 mm
(0.89 in.)
Light Tube
PM3 only74 mm
(2.9 in.)
Light Tube
PM3 only
(removable)
Ø63 mm
(2.48 in.)
Adjustable
186 mm to 227 mm
(6.81 in. to 8.96 in.)
51 mm
(2.0 in.)
76 mm
(3.0 in.)Note: Detector Surface
is 11 mm (0.44 in.) below
front face of aperture plat eAdjustable
198-252 mm
(7.79-9.97 in.)Ø83.8 mm
(3.3 in.)Ø10 mm
(0.39 in.)
Ø12.2 mm
(0.48 in.)76.4 mm
(3.01 in.)
48.5 mm
(1.91 in.)
51 mm
(2.0 in.)51 mm
(2.0 in.)
76 mm
(3.0 in.)2X 1/4-20 UNC
Female Mounting


In [10]:
#Loading -- Since we saved to a vector store, we can also use it again! 
from llama_index.packs.raptor import RaptorRetriever

retriever = RaptorRetriever(
    [],
    embed_model=OpenAIEmbedding(
        model="text-embedding-3-small"
    ),  # used for embedding clusters
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),  # used for generating summaries
    vector_store=vector_store,  # used for storage
    similarity_top_k=2,  # top k for each layer, or overall top-k for collapsed
    mode="tree_traversal",  # sets default mode
)

In [11]:
#Query Engine
from llama_index.core.query_engine import RetrieverQueryEngine

query_engine = RetrieverQueryEngine.from_args(
    raptor_pack.retriever, llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1)
)

In [12]:
response = query_engine.query("What are the specs for a PM10?")

In [13]:
print(str(response))

The specs for a PM10 are as follows: It has a power range of 300 mW to 150 W, with a female mounting thread size of 1/4-20 UNC. The sensor has dimensions of 51 mm (2.0 in.) by 76 mm (3.0 in.) and a detector surface located 7.14 mm (0.28 in.) below the front face of the aperture plate. The PM10 sensor requires a +5VDC power input.


In [16]:
response = query_engine.query("I want you to search for specific power sensor models that could be used to measure a 5W laser. Give me the five best options you can find.")

In [17]:
print(str(response))

LM-10, PM3, PM2, PM10, PM30
