In [None]:
!pip install llama_index
!pip install python-dotenv

In [10]:
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import VectorStoreIndex, load_index_from_storage
from llama_index.core import Document
from llama_parse import LlamaParse
from llama_index.core import SimpleDirectoryReader
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import TokenTextSplitter
from llama_index.core import StorageContext, load_index_from_storage
from llama_index.core import PromptTemplate

import os 
from dotenv import load_dotenv
load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")


In [60]:
document_context = "clinical trial and detailed information about locations."
template = (
    "This document contains important information about {document_context}."
    "\n---------------------\n"
    "Based on the provided document, your input query will be answered using the most relevant information."
    "consider community titles as hospital names."
    "\n---------------------\n"
    "Document Context: {document_context}\n"
    "User Query: {query_str}\n"
    "Response:\n"
)
qa_template_llamaindex = PromptTemplate(template)


In [62]:

documents = SimpleDirectoryReader("./clinicaltrial_data", encoding='utf-8', errors='ignore').load_data()

pipeline = IngestionPipeline(transformations=[TokenTextSplitter(),SentenceSplitter()])

nodes = pipeline.run(documents=documents)

In [63]:
# per-index
document = [nodes]
index = VectorStoreIndex.from_documents(
    documents,
    transformations=[SentenceSplitter(chunk_size=1024, chunk_overlap=20)],
)
print(index)

<llama_index.core.indices.vector_store.base.VectorStoreIndex object at 0x0000020879054910>


In [64]:
#Save to disk
index.set_index_id("clinicaltrial_01")
index.storage_context.persist("./clinicaltrial_01_index")

In [65]:
# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="./clinicaltrial_01_index")
# load index
index = load_index_from_storage(storage_context, index_id="clinicaltrial_01")
# Create a query engine and query
str = "Tell me the hospital names which has therapeutic areas anti-infectives"
query_str = str
query_engine = index.as_query_engine(text_qa_template=qa_template_llamaindex, similarity_top_k=5)

In [66]:

response = query_engine.query(str)
print(response)

1. Community Hospital of the Monterey Peninsula - Anti-Infectives
2. Mercy Medical Center - Anti-Infectives
3. St. Joseph Hospital - Anti-Infectives
4. Memorial Hospital - Anti-Infectives
5. Unity Hospital - Anti-Infectives


In [9]:
#This snippet illustrates the RAG pipeline. 
from llama_index.core.tools import QueryEngineTool
from llama_index.core.agent import ReActAgent
from llama_index.llms.openai import OpenAI


llm = OpenAI(model="gpt-4o", temperature=0)

rag_tool = QueryEngineTool.from_defaults(
    query_engine,
    name="Healthcare data",
    description="It has healthcare data",
)

agent = ReActAgent.from_tools(
    [rag_tool],llm=llm, max_iterations=5,verbose=False
)
response = agent.chat(
    "Tell me about medicare and related medical condition"
)

print(response)


KeyboardInterrupt: 

In [None]:
print("I am a Pinsonandtang chatbot. You can ask me questions about ICD-10 CM 2025 Guidelines, pinsonandtang.com products, CDI+ Data, Cynthia's Webinar")
print("=====================================================================================================================================")
while True:
    text_input = input("User: ")
    if text_input == "exit":
        break
    response = agent.chat(text_input)
    print(f"CDI+ Chatbot: \033[94m{response}\033[0m")