In [1]:
# Access the API through environment variable
import os
#from dotenv import load_dotenv
#load_dotenv()

openai_api_key = os.getenv('OPENAI_API_KEY')

In [2]:
import nest_asyncio
nest_asyncio.apply()

In [3]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

from llama_index.core import VectorStoreIndex, SQLDatabase
from llama_index.readers.wikipedia import WikipediaReader

In [4]:
from sqlalchemy import (
    create_engine,
    MetaData,
    ForeignKey,
    Table,
    Column,
    String,
    Integer,
    select,
    column,
)

from sqlalchemy.orm import declarative_base, relationship

In [5]:
engine = create_engine("sqlite:///:memory:", future=True)
metadata_obj = MetaData()

In [6]:
# create city SQL table
table_name = "product"
product_table = Table(
    table_name,
    metadata_obj,
    Column("sku", Integer, primary_key=True),
    Column("name", String(100), nullable=False),
    Column("product_category", String(30), nullable=False),
    Column("measurement_type", String(50), nullable=False),
    Column("cable_type", String(30), nullable=False),
    extend_existing=True,
)

table_name = "specs"
specs_table = Table(
    table_name,
    metadata_obj,
    Column("id", Integer, primary_key=True),  # Auto-incrementing ID
    Column("sku", Integer, ForeignKey("product.sku")),
    Column("spec_value", String(200)),
    Column("spec_name", String(200)),
    extend_existing=True,
)

metadata_obj.create_all(engine)

In [7]:
# print tables
metadata_obj.tables.keys()

dict_keys(['product', 'specs'])

In [8]:
import pandas as pd
from sqlalchemy import create_engine
from pathlib import Path

# Assuming 'engine' is already created and connected to your in-memory SQLite database

# Define the paths to your Excel files
base_path = Path('lmc_specs')  # Adjust this if your notebook's working directory is different
product_table_path = base_path / 'product_table.xlsx'
spec_table_path = base_path / 'spec_table.xlsx'

# Read the Excel files into DataFrames
product_df = pd.read_excel(product_table_path)
spec_df = pd.read_excel(spec_table_path)

# Load the DataFrames into the corresponding tables in the database
# If the tables already exist, append the data. Ensure you're using the same 'engine' as before
product_df.to_sql('product', con=engine, if_exists='append', index=False, method='multi')
spec_df.to_sql('specs', con=engine, if_exists='append', index=False, method='multi')

print("Data loaded into the database successfully.")


Data loaded into the database successfully.


In [9]:
sql_database = SQLDatabase(engine, include_tables=["product", "specs"])

In [10]:
from llama_index.core.query_engine import NLSQLTableQueryEngine

In [11]:
from llama_index.llms.openai import OpenAI
llm = OpenAI(temperature=0.1, model="gpt-3.5-turbo")

In [12]:
sql_query_engine = NLSQLTableQueryEngine(
    sql_database=sql_database,
    tables=["product", "specs"],
    llm=llm
)

In [14]:
query_str = "What are the specs for a PM10?"
response = sql_query_engine.query(query_str)

INFO:llama_index.core.indices.struct_store.sql_retriever:> Table desc str: Table 'product' has columns: sku (INTEGER), name (VARCHAR(100)), product_category (VARCHAR(30)), measurement_type (VARCHAR(50)), cable_type (VARCHAR(30)), and foreign keys: .

Table 'specs' has columns: id (INTEGER), sku (INTEGER), spec_value (VARCHAR(200)), spec_name (VARCHAR(200)), and foreign keys: ['sku'] -> product.['sku'].
> Table desc str: Table 'product' has columns: sku (INTEGER), name (VARCHAR(100)), product_category (VARCHAR(30)), measurement_type (VARCHAR(50)), cable_type (VARCHAR(30)), and foreign keys: .

Table 'specs' has columns: id (INTEGER), sku (INTEGER), spec_value (VARCHAR(200)), spec_name (VARCHAR(200)), and foreign keys: ['sku'] -> product.['sku'].
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "

In [15]:
from IPython.display import Markdown, display
display(Markdown(f"{response}"))

The specs for a PM10 laser include a maximum energy density of 600 millijoules per square centimeter, a minimum wavelength of 250 nanometers, a maximum wavelength of 11000 nanometers, a minimum power of 0.01 watts, a maximum power of 10 watts, and a cooling type of air. Other specifications include a wavelength range of 0.25 to 11 micrometers, a power range of 10 milliwatts to 10 watts, and a maximum intermittent power of 30 watts for less than 5 minutes. The PM10 also has a resolution of 1 milliwatt, a maximum power density of 6 kilowatts per square centimeter, and a detector coating of broadband. Additionally, it has a calibration uncertainty of ±1%, power linearity of ±1%, and spectral compensation accuracy of ±1.5%. The PM10 is compatible with meters such as LabMax-Pro SSIM, LabMax Touch, LabMax Touch Pro, and FieldMaxII-TOP.

## Generate pdf doc vector index using Llamaparse

In [None]:
# Uncomment if you are in a Jupyter Notebook
import nest_asyncio
import os
nest_asyncio.apply()

from llama_parse import LlamaParse
from llama_index.core import SimpleDirectoryReader

parser = LlamaParse(
    api_key=os.getenv('LLAMA_CLOUD_API_KEY'),  # can also be set in your env as LLAMA_CLOUD_API_KEY
    result_type="markdown"  # "markdown" and "text" are available
)

file_extractor = {".pdf": parser}
reader = SimpleDirectoryReader("./lmc_docs", file_extractor=file_extractor)
documents = reader.load_data()

In [17]:
from llama_index.core.node_parser import MarkdownElementNodeParser

In [18]:
#Splits a markdown document into Text Nodes and Index Nodes corresponding to embedded objects (e.g. tables).
node_parser = MarkdownElementNodeParser(llm=OpenAI(model="gpt-3.5-turbo-0125"))

In [None]:
nodes = node_parser.get_nodes_from_documents(documents)

In [20]:
base_nodes, objects = node_parser.get_nodes_and_objects(nodes)

In [21]:
recursive_index = VectorStoreIndex(nodes=base_nodes+objects)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [22]:
import os.path
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
)
persist_dir = "./lmc_data"

# store index for later
recursive_index.storage_context.persist(persist_dir=persist_dir)

In [None]:
# load the index
storage_context = StorageContext.from_defaults(persist_dir=persist_dir)

recursive_index = load_index_from_storage(storage_context)

In [24]:
from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker

reranker = FlagEmbeddingReranker(
    top_n=5,
    model="BAAI/bge-reranker-large",
)

recursive_query_engine = recursive_index.as_query_engine(
    similarity_top_k=15, 
    node_postprocessors=[reranker], 
    verbose=True
)

INFO:datasets:PyTorch version 2.1.0 available.
PyTorch version 2.1.0 available.


tokenizer_config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/279 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/801 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

## Define Query Engines, Set as Tools

In [46]:
from llama_index.core.tools import QueryEngineTool


sql_tool = QueryEngineTool.from_defaults(
    query_engine=sql_query_engine,
    description=(
        "Useful for translating a natural language query into a SQL query over"
        " a table containing: sku or part number, product name, spec_value, spec_name, cable_type,"
        " product_category, and measurement_category for laser measurement products."
        "Query this table for power sensor and energy sensor specs"
        "Whenever needing to select a product."
    ),
)

vector_tool = QueryEngineTool.from_defaults(
    query_engine=recursive_query_engine,
    description=(
        "Useful for answering semantic questions about laser measurement"
        " and general information about selecting laser measurement products."
        " This tool does not contain detailed product specs. Use the sql_tool"
        "to find specificiations for energy sensor, power sensor,"
        "and beam diagnostics product selection."
    ),
)

## Define Router Query Engine

In [47]:
from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector

query_engine = RouterQueryEngine(
    selector=LLMSingleSelector.from_defaults(),
    query_engine_tools=[
        sql_tool,
        vector_tool,
    ],
)

In [48]:
from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector, LLMMultiSelector
from llama_index.core.selectors import (
    PydanticMultiSelector,
    PydanticSingleSelector,
)

query_engine = RouterQueryEngine(
    selector=PydanticSingleSelector.from_defaults(),
    query_engine_tools=[
        sql_tool,
        vector_tool,
    ],
)

In [42]:
response = query_engine.query("How do I select a power sensor?")
print(str(response))

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:llama_index.core.query_engine.router_query_engine:Selecting query engine 1: The choice (2) is most relevant as it specifically mentions selecting a laser power sensor among other related items for laser measurement..
Selecting query engine 1: The choice (2) is most relevant as it specifically mentions selecting a laser power sensor among other related items for laser measurement..
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
[1;3;38;2;11;159;203mRetrieval entering id_6733a882-f8f3-4b67-819f-bae064693963_6_table: TextNode
[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query How do I select a power sensor?
[0m[1;3;38;2;11;159;203mRetrieval entering id_ae33e4dc-17de-467f-bc56-a9dd6b4ff

In [43]:
response = query_engine.query("What are several good power sensor options to measure a 5W laser with 10mm beam diameter?")
print(str(response))

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:llama_index.core.query_engine.router_query_engine:Selecting query engine 1: The choice is relevant as it specifically mentions selecting a laser power sensor, which is what is needed to measure the power of a 5W laser..
Selecting query engine 1: The choice is relevant as it specifically mentions selecting a laser power sensor, which is what is needed to measure the power of a 5W laser..
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
[1;3;38;2;11;159;203mRetrieval entering id_6733a882-f8f3-4b67-819f-bae064693963_6_table: TextNode
[0m[1;3;38;2;237;90;200mRetrieving from object TextNode with query What are several good power sensor options to measure a 5W laser with 10mm beam diameter?
[0m[1;3;38;2;11

## Pydantic Multi Selector

In [49]:
from llama_index.core.query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector, LLMMultiSelector
from llama_index.core.selectors import (
    PydanticMultiSelector,
    PydanticSingleSelector,
)

multi_query_engine = RouterQueryEngine(
    selector=PydanticMultiSelector.from_defaults(),
    query_engine_tools=[
        sql_tool,
        vector_tool,
    ],
)

In [50]:
response = multi_query_engine.query("What are several good power sensor options to measure a 5W laser with 10mm beam diameter?")
print(str(response))

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:llama_index.core.query_engine.router_query_engine:Selecting query engine 0: This choice is specifically focused on translating natural language queries into SQL queries for selecting laser measurement products, which includes power sensor and energy sensor specs. It mentions querying the table for power sensor and energy sensor specs, making it relevant to the question about power sensor options for measuring a 5W laser with a 10mm beam diameter..
Selecting query engine 0: This choice is specifically focused on translating natural language queries into SQL queries for selecting laser measurement products, which includes power sensor and energy sensor specs. It mentions querying the table for power sensor and energy sensor specs, making it relevant to the question about power sensor options for measuring a 5W lase