# Setting Up the Environment
This section imports necessary libraries and sets up the environment for the notebook.

In [1]:
import nest_asyncio
from dotenv import load_dotenv, find_dotenv
import os

load_dotenv(find_dotenv())

nest_asyncio.apply()

# Connecting to Qdrant
This section establishes a connection to the Qdrant vector database.

In [2]:
import qdrant_client

collection_name="chat_with_docs_v2"

client = qdrant_client.QdrantClient(
    host="localhost",
    port=6333
)

# Instrumentation Setup
This section sets up instrumentation for tracing and monitoring.

In [3]:
from openinference.instrumentation.llama_index import LlamaIndexInstrumentor
from phoenix.otel import register

tracer_provider = register()
LlamaIndexInstrumentor().instrument(tracer_provider=tracer_provider)

🔭 OpenTelemetry Tracing Details 🔭
|  Phoenix Project: default
|  Span Processor: SimpleSpanProcessor
|  Collector Endpoint: localhost:4317
|  Transport: gRPC
|  Transport Headers: {'user-agent': '****'}
|  
|  Using a default SpanProcessor. `add_span_processor` will overwrite this default.
|  
|  
|  `register` has set this TracerProvider as the global OpenTelemetry default.
|  To disable this behavior, call `register` with `set_global_tracer_provider=False`.



# Loading Documents
This section loads documents from the specified directory.

In [3]:
from llama_index.core import SimpleDirectoryReader

input_dir_path = './docs'

loader = SimpleDirectoryReader(
            input_dir = input_dir_path,
            required_exts=[".pdf"],
            recursive=True
        )
docs = loader.load_data()

# Checking Loaded Documents
This section checks the type and number of loaded documents.

In [4]:
type(docs), len(docs)

(list, 12)

# Creating a Vector Store Index
This section defines a function to create a vector store index using Qdrant.

In [5]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import VectorStoreIndex, ServiceContext, StorageContext

def create_index(documents):

    vector_store = QdrantVectorStore(client=client,
                                     collection_name=collection_name)
    
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    
    index = VectorStoreIndex.from_documents(documents,
                                            storage_context=storage_context)
    
    return index

# Setting Up Embedding Model
This section sets up the embedding model for the index.

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5",
                                   trust_remote_code=True)

Settings.embed_model = embed_model

index = create_index(docs)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/779 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

I0000 00:00:1747466909.776390   37722 chttp2_transport.cc:1201] ipv6:%5B::1%5D:4317: Got goaway [11] err=UNAVAILABLE:GOAWAY received; Error code: 11; Debug Text: ping_timeout {grpc_status:14, http2_error:11, created_time:"2025-05-17T12:58:29.773682233+05:30"}
Transient error StatusCode.UNAVAILABLE encountered while exporting traces to localhost:4317, retrying in 1s.


### Do Not Use Gemini/GCP

In [7]:
from llama_index.embeddings.google_genai import GoogleGenAIEmbedding
from llama_index.core import Settings
from google.auth import default

credentials, _ = default()

project = os.getenv("GOOGLE_CLOUD_PROJECT_ID")
location = os.getenv("GOOGLE_CLOUD_LOCATION")


embed_model = GoogleGenAIEmbedding(
    model_name="text-embedding-004",
    embed_batch_size=100,
    vertexai_config={
        "project": project,
        "location": 'us-central1',
        "credentials": credentials
    }
)

Settings.embed_model = embed_model

index = create_index(docs)

# Configuring LLM
This section configures the language model for the query engine.

In [8]:
# Example for Groq (commented out for now)
# from llama_index.llms.groq import Groq
# llm = Groq(model="gemma2-9b-it", request_timeout=120.0)

import os
import warnings

from google.auth import default
from llama_index.llms.vertex import Vertex
from llama_index.core.settings import Settings
from llama_index.core.base.llms.types import ChatMessage

# Suppress Vertex deprecation warnings (switch to GoogleGenAI soon)
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Google Cloud default credentials
credentials, _ = default()

# Initialize Vertex LLM
llm = Vertex(
    model="gemini-2.0-flash",
    temperature=0.7,
    credentials=credentials,
    project=os.getenv("GOOGLE_CLOUD_PROJECT_ID"),
    location=os.getenv("GOOGLE_CLOUD_PROJECT_REGION"),
)

# Set as default LLM for LlamaIndex
Settings.llm = llm

# Example chat call
response = llm.chat([
    ChatMessage(role="user", content="Hello there Gemini! How are you doing today?")
])
print(response)

assistant: Hello! I'm doing well, thank you for asking. I'm ready to assist you with whatever you need. How can I help you today?



# Defining a Prompt Template
This section defines a custom prompt template for the query engine.

In [9]:
from llama_index.core import PromptTemplate

template = """Context information is below:
              ---------------------
              {context_str}
              ---------------------
              Given the context information above I want you to think
              step by step to answer the query in a crisp manner,
              incase you don't know the answer say 'I don't know!'
            
              Query: {query_str}
        
              Answer:"""

qa_prompt_tmpl = PromptTemplate(template)

# Setting Up Reranking
This section sets up a reranking mechanism for query results.

In [10]:
from llama_index.core.postprocessor import SentenceTransformerRerank

rerank = SentenceTransformerRerank(
    model="cross-encoder/ms-marco-MiniLM-L-2-v2", 
    top_n=3
)

# Querying the Engine
This section demonstrates how to query the engine with a custom prompt and reranking.

In [13]:
query_engine = index.as_query_engine(similarity_top_k=10,
                                     node_postprocessors=[rerank])

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

response = query_engine.query("What exactly is Activ One? and tell me all its benefits in detail")

# Displaying the Response
This section displays the response from the query engine in a markdown format.

In [14]:
from IPython.display import Markdown, display

display(Markdown(str(response)))

Here's a breakdown of what Activ One is and its benefits, based on the provided context:

**What Activ One Is:**

*   Activ One is a health insurance plan offered by Aditya Birla Health Insurance Co. Limited.
*   It's designed to protect you from rising medical costs, acting as an "inflation-proof" plan.

**Benefits of Activ One (in detail):**

*   **Unlimited Coverage:** Aims to provide limitless coverage. This means no sub-limits, and base benefits are covered up to the Sum Insured.
*   **Increasing Sum Insured:** The "Super Credit" feature inflates the Sum Insured up to 6 times the original amount by the 6th year of the policy, regardless of claims. Optional feature Activ One NXT, 3X from 2nd renewal and onwards for Activ One VYTL
*   **Wide Range of Sum Insured:** Offers a range of Sum Insured options, from INR 5 lacs to INR 2 crores.
*   **Zone-Based Premium:** Premium is determined by the city of residence.
*   **No Maximum Entry Age:** There is no upper age limit to enroll in the plan.
*   **Age-Banded Premium:** Premium increases only after certain age bands, not every year.
*   **Covers Live-in Partners:** Now covers legally married spouse or live-in partner (same or opposite sex)
