In [8]:
!pip install -qU \
    google-cloud-aiplatform \
    google-cloud-storage \
    llama-index \
    llama-index-embeddings-vertex \
    llama-index-llms-vertex \
    llama-index-vector_stores-vertexaivectorsearch \
    llama-index-llms-fireworks \
    llama-index-embeddings-huggingface \
    openpyxl \
    pandas

In [53]:
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

{'status': 'ok', 'restart': True}

In [None]:
# Imports
import os
import logging
import re 

from google.cloud import aiplatform, storage
from llama_index.core import (
    Document,
    PromptTemplate,
    Settings,
    SimpleDirectoryReader,
    StorageContext,
    SummaryIndex,
    VectorStoreIndex,
)
from llama_index.core.agent import ReActAgent
from llama_index.core.base.base_query_engine import BaseQueryEngine
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.objects import ObjectIndex
from llama_index.core.prompts import LangchainPromptTemplate
from llama_index.core.prompts.base import BasePromptTemplate
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.embeddings.vertex import VertexTextEmbedding
from llama_index.llms.vertex import Vertex
from llama_index.vector_stores.vertexaivectorsearch import VertexAIVectorStore

In [None]:
from llama_index.llms.vertex import Vertex

In [3]:
PROJECT_ID = ""  
LOCATION = ""  

import vertexai

vertexai.init(project=PROJECT_ID, location=LOCATION)

In [4]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

2025-02-09 10:40:34.109522: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-02-09 10:40:35.471165: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/local/cuda/extras/CUPTI/lib64:/usr/lib/x86_64-linux-gnu/:/opt/conda/lib
2025-02-09 10:40:35.471356: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local

In [5]:
llm = Vertex("gemini-pro")

Settings.embed_model = embed_model
Settings.llm = llm

## Medical Devices 

In [6]:
import pandas as pd
import json
from llama_index.core import Document
import os

# Define file paths
excel_file = "data/Defribilators-Only.xlsx"
json_dir = "data"
json_file = os.path.join(json_dir, "excel_sheets_data.json")

# Ensure the directory exists
os.makedirs(json_dir, exist_ok=True)

# Read the Excel file
df_dict = pd.read_excel(excel_file, sheet_name=None)

json_data = {}

for sheet_name, sheet_df in df_dict.items():
    sheet_records = sheet_df.to_dict(orient="records")
    json_data[sheet_name] = sheet_records

    print(f"Sheet: {sheet_name}")
    if len(sheet_records) > 0:
        sample_records = sheet_records[:3]
        print(json.dumps(sample_records, indent=2))
    else:
        print("  No records found.")
    
    print(f"  Total records in '{sheet_name}': {len(sheet_records)}\n")

# Save JSON data in "medical_devices/data/excel_sheets_data.json"
with open(json_file, "w", encoding="utf-8") as f:
    json.dump(json_data, f, indent=2, ensure_ascii=False)

documents = []
for sheet_name, sheet_records in json_data.items():
    json_str = json.dumps(sheet_records, ensure_ascii=False)
    doc = Document(text=json_str, metadata={"sheet_name": sheet_name})
    documents.append(doc)

print(f"\nCreated {len(documents)} LlamaIndex Documents.")
print(f"JSON data saved to: {json_file}")


Sheet: Defibilators 
[
  {
    "Product Title": "Defibrillator Auotmatic ZOLL\u00ae X Series ",
    "Price": 27850.0,
    "Manufacturer_code": "603-0220001-01",
    "Model": 1179643,
    "Brand": "Zoll\u00ae X Series",
    "Manufacturer_code.1": "603-0220001-01",
    "Brand.1": "Zoll\u00ae X Series",
    "Manufacturer": "Zoll Medical",
    "Country of Origin": "Unknown",
    "Application": "Defibrillator",
    "Charge Time": "7 Seconds",
    "Contact Type": "Pads",
    "Dimensions": "8.9 X 10.4 Inch",
    "Display Type": "LCD Display",
    "Energy Output": "1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 30, 50, 70, 85, 100, 120, 150, 200 joules",
    "Operation Type": "Auotmatic",
    "Power Source": "AC Power / Battery Operated",
    "Prompt Type": "Unknown",
    "UNSPSC Code": 42172101,
    "Weight": "11.7 lbs.",
    "images": NaN,
    "features": "At less than 12 pounds/6 kilograms, the X Series is up to half the size and weight of other full-featured devices, Its large, bright screen allow

In [8]:
# 1. Import VectorStoreIndex from llama_index
from llama_index.core import VectorStoreIndex

# 2. Build an index from your existing 'documents' list
index = VectorStoreIndex.from_documents(documents)

# 3. Create a query engine
query_engine = index.as_query_engine()

# 4. Ask a sample question
query = "Which products are from the USA and have a charge time under 2 hours?"
response = query_engine.query(query)

print("Query:", query)
print("Answer:", response.response)


Query: Which products are from the USA and have a charge time under 2 hours?
Answer: The following products are from the USA and have a charge time under 2 hours:

* Refurbished AED Automatic Philips Pads
* Refurbished AED Unit Semi-Automatic Powerheart® AED G3 Electrode Pads Contact
* AED Unit Automatic Zoll®


In [9]:
# 4. Ask a sample question
query = "Which products cost under $200 and are made in the USA?"
response = query_engine.query(query)

print("Query:", query)
print("Answer:", response.response)
print("—"*60)

# 4. Ask a sample question
query = "Which defibrillators have a charge time of 1 hour or less and weigh under 2 lbs?"
response = query_engine.query(query)

print("Query:", query)
print("Answer:", response.response)
print("—"*60)

# 4. Ask a sample question
query = "Which items are labeled with FSA Eligible - Sell UOM and come from Brand XYZ?"
response = query_engine.query(query)

print("Query:", query)
print("Answer:", response.response)
print("—"*60)

# 4. Ask a sample question
query = "Which products have an Operation Type of ‘Automatic’ and come from Manufacturer ABC?"
response = query_engine.query(query)

print("Query:", query)
print("Answer:", response.response)
print("—"*60)

# 4. Ask a sample question
query = "List all defibrillators with a display type of ‘LCD’ and an energy output of at least 200 Joules."
response = query_engine.query(query)

print("Query:", query)
print("Answer:", response.response)
print("—"*60)


Query: Which products cost under $200 and are made in the USA?
Answer: I am sorry, but based on the provided context, there are no products that cost under $200 and are made in the USA. The context only provides information about defibrillators, and none of them are listed as being made in the USA. Additionally, the context does not provide any information about the price of the defibrillators.
————————————————————————————————————————————————————————————
Query: Which defibrillators have a charge time of 1 hour or less and weigh under 2 lbs?
Answer: ## Defibrillators with Charge Time of 1 Hour or Less and Weight Under 2 lbs

Based on the provided context, there are no defibrillators that meet both criteria of having a charge time of 1 hour or less and weighing under 2 lbs. 

Here's a breakdown of the defibrillators listed:

* **Refurbished Defibrillator Unit Automatic Philips HeartStart® XL Paddles Contact:** Charge time of 3 seconds, weight of 14 lbs.
* **AED Unit Automatic AED Plus®:*

## ReACT

In [10]:
# ========= Build ReAct agent on top of your existing documents ==========

# 1. Build a vector index from documents
from llama_index.core import VectorStoreIndex


index = VectorStoreIndex.from_documents(documents)

# (Optional) Persist the index so you can load it later without rebuilding
index.storage_context.persist(persist_dir="./storage/defibrillators_index")

In [11]:
# 2. Create a QueryEngine with top_k=3
query_engine = index.as_query_engine(similarity_top_k=3)

# 3. Wrap the QueryEngine in a QueryEngineTool
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.agent import ReActAgent

query_engine_tool = QueryEngineTool(
    query_engine=query_engine,
    metadata=ToolMetadata(
        name="defibrillator_data",
        description=(
            "Provides information about defibrillator product data, "
            "including Price, Brand, Manufacturer Code, Model, Country of Origin, etc. "
        ),
    ),
)

In [12]:

# 4. Initialize ReActAgent with a single tool and your desired LLM

agent = ReActAgent.from_tools(
    [query_engine_tool],
    llm=llm,
    verbose=True
)

In [13]:

# 5. Ask a question
response = agent.chat("Which products are from the USA and have a charge time under 2 hours?")
print("User Query:", "Which products are from the USA and have a charge time under 2 hours?")
print("Agent Response:", str(response))

print("------------------------------------------------------------")


> Running step 6c545375-ee32-4466-bcb1-05721e83d811. Step input: Which products are from the USA and have a charge time under 2 hours?
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: defibrillator_data
Action Input: {'input': 'Which products are from the USA and have a charge time under 2 hours?'}
[0m[1;3;34mObservation: ## Products from the USA with a charge time under 2 hours:

Based on the provided context, the following products are from the USA and have a charge time under 2 hours:

* **Refurbished AED Automatic Philips Pads:** This product is from the USA and has a charge time of 20 seconds.
* **Refurbished AED Unit Semi-Automatic AED Plus® Electrode Pads Contact:** This product is from the USA and has a charge time of 10 seconds.
* **AED Unit Automatic Zoll®:** This product is from the USA and has a charge time of 10 seconds.

**Note:** The context does not provide information about the charge t

In [14]:

# 5. Ask a question
response = agent.chat("List all defibrillators with a display type of ‘LCD’ and an energy output of at least 200 Joules.")
print("User Query:", "List all defibrillators with a display type of ‘LCD’ and an energy output of at least 200 Joules.")
print("Agent Response:", str(response))

print("------------------------------------------------------------")


> Running step e93d3ccb-b6a0-446a-a2d0-e55c0d26d212. Step input: List all defibrillators with a display type of ‘LCD’ and an energy output of at least 200 Joules.
[1;3;34mObservation: Error: Could not parse output. Please follow the thought-action-input format. Try again.
[0m> Running step 9ccf3bd0-a6fb-4d34-88b9-0c2cb262b64c. Step input: None
[1;3;34mObservation: Error: Could not parse output. Please follow the thought-action-input format. Try again.
[0m> Running step 34ee68fc-3d60-4388-b339-04ffdbda1185. Step input: None
[1;3;34mObservation: Error: Could not parse output. Please follow the thought-action-input format. Try again.
[0m> Running step 3245c2ac-63f2-49b9-80d1-d40ac7e04319. Step input: None
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: defibrillator_data
Action Input: {'input': 'List all defibrillators with a display type of ‘LCD’ and an energy output of at least 200 Joules.'}
[0m[1