In [1]:
# Cell 1: Imports & Environment Setup
import os
import time
import json
import boto3
from dotenv import load_dotenv

# LangChain & AI Libraries
from langchain_docling import DoclingLoader
from langchain_docling.loader import ExportType
from langchain_text_splitters import MarkdownHeaderTextSplitter, RecursiveCharacterTextSplitter
from langchain_aws import BedrockEmbeddings, ChatBedrock
from langchain_pinecone import PineconeVectorStore
from langchain_core.documents import Document
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from operator import itemgetter

# Pinecone & Evaluation
from pinecone import Pinecone, ServerlessSpec
from pinecone_text.sparse import BM25Encoder
from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy, context_precision
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.run_config import RunConfig
from datasets import Dataset

# Load Environment Variables
load_dotenv(override=True)

print("‚úÖ Libraries loaded. Environment verified.")

  from .autonotebook import tqdm as notebook_tqdm


‚úÖ Libraries loaded. Environment verified.


In [2]:
# Cell 2: Smart Initialization & Duplicate Check
# Configuration
file_path = "SBIhomeinsurance_home.pdf" # Make sure this matches your file name
index_name = "sbi-home-insurance-rag-hybrid" # Using your existing hybrid index name

# 1. Connect to Pinecone
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

# 2. Check if Index Exists
existing_indexes = [index.name for index in pc.list_indexes()]

if index_name not in existing_indexes:
    print(f"‚ö†Ô∏è Index '{index_name}' not found. Creating it...")
    pc.create_index(
        name=index_name,
        dimension=1024, # Titan v2
        metric="dotproduct", # Required for Hybrid
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )
    time.sleep(20) # Wait for init
    print("‚úÖ Index created successfully.")
else:
    print(f"‚úÖ Index '{index_name}' already exists.")

# 3. Connect to the Index
index = pc.Index(index_name)

# 4. Check if File is Already Ingested (The "Smart" Check)
# We perform a dummy query filtering by this specific source file
print(f"üîç Checking if '{file_path}' is already in the database...")

# We use a dummy vector just to trigger the metadata filter
dummy_vector = [0.0] * 1024 
check_response = index.query(
    vector=dummy_vector,
    top_k=1,
    filter={"source": file_path},
    include_metadata=False
)

if len(check_response['matches']) > 0:
    print(f"‚úÖ File '{file_path}' detected in Pinecone.")
    print("üöÄ SKIPPING Docling & Embeddings to save cost.")
    should_ingest = False
else:
    print(f"‚ö†Ô∏è File '{file_path}' NOT found in Pinecone.")
    print("‚öôÔ∏è Proceeding with Ingestion...")
    should_ingest = True

‚úÖ Index 'sbi-home-insurance-rag-hybrid' already exists.
üîç Checking if 'SBIhomeinsurance_home.pdf' is already in the database...
‚úÖ File 'SBIhomeinsurance_home.pdf' detected in Pinecone.
üöÄ SKIPPING Docling & Embeddings to save cost.


In [3]:
# Cell 3: Load & Chunk (Conditional)
final_chunks = []

if should_ingest:
    print(f"üìÑ Starting Docling processing for {file_path}...")
    
    # A. Load with Docling (Export to Markdown)
    loader = DoclingLoader(
        file_path=file_path,
        export_type=ExportType.MARKDOWN
    )
    docs = loader.load()
    print("‚úÖ PDF Loaded via Docling.")

    # B. Split by Headers (Level 1)
    headers_to_split_on = [
        ("#", "Header 1"),
        ("##", "Header 2"),
        ("###", "Header 3"),
    ]
    markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
    md_header_splits = markdown_splitter.split_text(docs[0].page_content)
    
    # C. Split by Size (Level 2)
    chunk_size = 1000
    chunk_overlap = 200
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, 
        chunk_overlap=chunk_overlap
    )
    final_chunks = text_splitter.split_documents(md_header_splits)

    # D. Add Metadata Tags (Crucial for Smart Indexing)
    for chunk in final_chunks:
        chunk.metadata["source"] = file_path # Used for filtering later
        # We also keep the 'text' in metadata for Hybrid retrieval
        chunk.metadata["text"] = chunk.page_content 
    
    print(f"‚úÖ Chunking Complete. Created {len(final_chunks)} chunks.")
    print("Sample Metadata:", final_chunks[0].metadata)

else:
    print("‚è≠Ô∏è Skipping Loading & Chunking (Data already exists).")

‚è≠Ô∏è Skipping Loading & Chunking (Data already exists).


In [4]:
# Cell 4: Hybrid Embedding & Upsert (Conditional)
import boto3
from langchain_aws import BedrockEmbeddings
from pinecone_text.sparse import BM25Encoder

# 1. Initialize AWS Bedrock Embeddings (Need this for both Ingestion AND Querying)
boto3_session = boto3.Session()
bedrock_client = boto3_session.client("bedrock-runtime", region_name="us-east-1")

embeddings = BedrockEmbeddings(
    model_id="amazon.titan-embed-text-v2:0",
    client=bedrock_client
)

# 2. Initialize BM25 Encoder
bm25 = BM25Encoder()
bm25_filename = "bm25_values.json"

if should_ingest:
    print("‚öôÔ∏è Generatings Embeddings & Upserting...")
    
    # A. Fit BM25 on the new text
    chunk_texts = [chunk.page_content for chunk in final_chunks]
    bm25.fit(chunk_texts)
    bm25.dump(bm25_filename) # Save for future use
    print("‚úÖ BM25 Encoder fitted and saved.")
    
    # B. Generate Vectors & Upsert
    vectors_to_upsert = []
    
    print(f"Generating vectors for {len(final_chunks)} chunks...")
    for i, chunk in enumerate(final_chunks):
        # 1. Dense Vector (Titan)
        dense_vec = embeddings.embed_query(chunk.page_content)
        
        # 2. Sparse Vector (BM25)
        sparse_vec = bm25.encode_documents(chunk.page_content)
        
        # 3. Create ID (Unique based on source + index)
        # We use a simple hash or index. Here index 'i' is fine for this run.
        # Ideally, hash the text to avoid dupes, but for now:
        vector_id = f"{file_path}_{i}"
        
        vectors_to_upsert.append({
            "id": vector_id,
            "values": dense_vec,
            "sparse_values": sparse_vec,
            "metadata": chunk.metadata # Includes 'source' and 'text'
        })
        
    # C. Batch Upsert to Pinecone
    batch_size = 50
    for i in range(0, len(vectors_to_upsert), batch_size):
        batch = vectors_to_upsert[i : i + batch_size]
        index.upsert(vectors=batch)
        print(f"   Uploaded batch {i} to {i+batch_size}")
        
    print("‚úÖ Ingestion Complete.")

else:
    # If we skipped ingestion, we MUST load the BM25 model from disk
    # so we can still run queries.
    if os.path.exists(bm25_filename):
        bm25.load(bm25_filename)
        print("‚úÖ Skipped Ingestion. Loaded existing BM25 params from file.")
    else:
        print("‚ö†Ô∏è Warning: BM25 file not found. You might need to re-ingest if retrieval fails.")

2025-12-10 23:31:10,834 - INFO - Found credentials in environment variables.


‚úÖ Skipped Ingestion. Loaded existing BM25 params from file.


In [5]:
# Cell 5: Setup Retrieval & Re-ranking Engines
from typing import List

# 1. Define the Bedrock Cohere Re-ranker Class
class BedrockCohereReranker:
    def __init__(self, region_name="us-east-1"):
        self.client = boto3.client("bedrock-runtime", region_name=region_name)
        self.model_id = "cohere.rerank-v3-5:0"

    def rerank(self, query: str, docs: List[str], top_n: int = 5):
        # Docs must be a list of strings for the API
        if not docs: return []
        
        request_body = {
            "query": query, 
            "documents": docs, 
            "top_n": top_n, 
            "api_version": 2
        }
        
        try:
            response = self.client.invoke_model(modelId=self.model_id, body=json.dumps(request_body))
            response_body = json.loads(response['body'].read())
            results = response_body.get("results", [])
            return results # Returns list of {'index': int, 'relevance_score': float}
        except Exception as e:
            print(f"‚ö†Ô∏è Rerank Error: {e}")
            # Fallback: return indices 0..top_n
            return [{"index": i, "relevance_score": 0.0} for i in range(min(len(docs), top_n))]

# Initialize the Reranker
reranker = BedrockCohereReranker()
print("‚úÖ Cohere Re-ranker Initialized.")

# 2. Define the "Intelligent Retrieval" Function
# This combines Hybrid Search (Pinecone) + Re-ranking (Cohere)
def intelligent_retrieval(query: str) -> str:
    print(f"üîé Searching for: '{query}'")
    
    # A. Hybrid Search in Pinecone (Top 25)
    dense_vec = embeddings.embed_query(query)
    # Note: If you want strict keyword matching, enable the line below:
    # sparse_vec = bm25.encode_queries(query) 
    
    results = index.query(
        vector=dense_vec,
        # sparse_vector=sparse_vec, # Uncomment if passing sparse values
        top_k=25,
        include_metadata=True
    )
    
    # Extract just the text from the matches
    raw_docs = [match['metadata']['text'] for match in results['matches']]
    
    if not raw_docs:
        return ""

    # B. Re-ranking (Filter 25 -> Top 5)
    rerank_results = reranker.rerank(query, raw_docs, top_n=5)
    
    # C. Format the Top 5 for the LLM
    top_docs_text = []
    for res in rerank_results:
        idx = res['index']
        top_docs_text.append(raw_docs[idx])
        
    return "\n\n".join(top_docs_text)

print("‚úÖ Retrieval Logic Defined.")

2025-12-10 23:31:26,790 - INFO - Found credentials in environment variables.


‚úÖ Cohere Re-ranker Initialized.
‚úÖ Retrieval Logic Defined.


In [34]:
# Cell 5.5: Initialize anthropic.claude-3-5-haiku Model

from langchain_aws import ChatBedrock

# We use the US Cross-Region Inference Profile for Llama 3.1
llm = ChatBedrock(
    model_id="us.anthropic.claude-3-5-haiku-20241022-v1:0",  ## us.meta.llama3-1-70b-instruct-v1:0
    client=bedrock_client, # We defined this client in Cell 4
    model_kwargs={"temperature": 0.1, "max_tokens": 512} # max_tokens": 2048
)

print("‚úÖ anthropic.claude-3-5-haiku Model Initialized.")

‚úÖ anthropic.claude-3-5-haiku Model Initialized.


In [35]:
# Cell 6: LLM Chain Setup
# 1. Define the Prompt
# We strictly tell the LLM to use ONLY the provided context.
prompt_template = """
You are an expert Insurance Assistant. Use the following pieces of retrieved context to answer the question.
If the answer is not in the context, just say that you don't know. Do not try to make up an answer.

CONTEXT:
{context}

QUESTION:
{question}

ANSWER:
"""

prompt = PromptTemplate(
    template=prompt_template, 
    input_variables=["context", "question"]
)

# 2. Define the Chain
# This pipeline does: Take Query -> Get Smart Context -> Format Prompt -> Run Llama 3 -> Parse String
rag_chain_final = (
    {
        "context": RunnableLambda(intelligent_retrieval), # Uses our Hybrid + Rerank function
        "question": RunnablePassthrough()
    }
    | prompt
    | llm
    | StrOutputParser()
)

print("‚úÖ RAG Chain (Production Ready) Created.")

# 3. Quick Sanity Check
# Let's run a simple test to make sure the chain flows correctly

test_q = "What specific exclusions apply to loss caused by Subsidence?"
#"What is the deductible for Personal Property?"
#"what is the name of company which provides this insurance? and give me address for this company. also give me contact details for this comapny"
#"from the document tell me in terms of payment what policys provide how much insurance back means in terms of money"


print(f"\nüß™ Sanity Check Query: '{test_q}'")
print("-" * 40)
print(rag_chain_final.invoke(test_q))

‚úÖ RAG Chain (Production Ready) Created.

üß™ Sanity Check Query: 'What specific exclusions apply to loss caused by Subsidence?'
----------------------------------------
üîé Searching for: 'What specific exclusions apply to loss caused by Subsidence?'


2025-12-11 00:01:54,959 - INFO - Successfully invoked model amazon.titan-embed-text-v2:0. ResponseMetadata: {'RequestId': '6da7269b-ae93-4dbb-8bac-fd18ea368bfe', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Wed, 10 Dec 2025 18:31:55 GMT', 'content-type': 'application/json', 'content-length': '43375', 'connection': 'keep-alive', 'x-amzn-requestid': '6da7269b-ae93-4dbb-8bac-fd18ea368bfe', 'x-amzn-bedrock-invocation-latency': '87', 'x-amzn-bedrock-input-token-count': '14'}, 'RetryAttempts': 0}
2025-12-11 00:01:57,215 - INFO - Using Bedrock Invoke API to generate response


Based on the provided context, for subsidence, the following exclusions apply:

Subsidence is excluded when caused by:
a. Normal cracking, settlement or bedding down of new structures
b. The settlement or movement of made up ground
c. Coastal or river erosion
d. Defective design or workmanship or use of defective materials
e. Demolition, construction, structural alterations or repair of any property
f. Groundworks or excavations

These exclusions are specifically listed under section 6 of the context, which covers "Subsidence of the land on which Your Home Buildings stands, Landslide, Rockslide".


In [13]:
# Cell 7: Final Evaluation Run
import pandas as pd

# 1. Define the Hard Questions
test_questions = [
    "What specific exclusions apply to loss caused by Subsidence?", 
    "What is the deductible for Personal Property?",
    "What are the specific exclusions for Riot, strikes, or malicious damages?"
]

print("üöÄ Running Final Evaluation on Test Set...")
print("-" * 50)

results = []

for q in test_questions:
    print(f"Asking: {q}")
    try:
        # Run the robust chain
        answer = rag_chain_final.invoke(q)
        
        # Save result
        results.append({
            "Question": q,
            "AI Answer": answer.strip(),
            "Status": "‚úÖ Success"
        })
    except Exception as e:
        results.append({
            "Question": q,
            "AI Answer": f"ERROR: {e}",
            "Status": "‚ùå Failed"
        })

# 2. Display Results in a Clean Table
df = pd.DataFrame(results)

print("\n" + "="*60)
print("üèÜ FINAL PROJECT ACCURACY REPORT")
print("="*60)

# Print full details for verification
for i, row in df.iterrows():
    print(f"\nQ{i+1}: {row['Question']}")
    print(f"A: {row['AI Answer']}")
    print("-" * 40)

üöÄ Running Final Evaluation on Test Set...
--------------------------------------------------
Asking: What specific exclusions apply to loss caused by Subsidence?
üîé Searching for: 'What specific exclusions apply to loss caused by Subsidence?'


2025-12-06 16:30:42,721 - INFO - Successfully invoked model amazon.titan-embed-text-v2:0. ResponseMetadata: {'RequestId': '80e646a4-7d00-42c3-b10f-33d176fed6c3', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Sat, 06 Dec 2025 11:00:23 GMT', 'content-type': 'application/json', 'content-length': '43375', 'connection': 'keep-alive', 'x-amzn-requestid': '80e646a4-7d00-42c3-b10f-33d176fed6c3', 'x-amzn-bedrock-invocation-latency': '95', 'x-amzn-bedrock-input-token-count': '14'}, 'RetryAttempts': 0}
2025-12-06 16:31:17,781 - INFO - Using Bedrock Invoke API to generate response


Asking: What is the deductible for Personal Property?
üîé Searching for: 'What is the deductible for Personal Property?'


2025-12-06 16:31:27,586 - INFO - Successfully invoked model amazon.titan-embed-text-v2:0. ResponseMetadata: {'RequestId': '0574e333-07bf-451a-a7a8-964937c2869c', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Sat, 06 Dec 2025 11:01:22 GMT', 'content-type': 'application/json', 'content-length': '43328', 'connection': 'keep-alive', 'x-amzn-requestid': '0574e333-07bf-451a-a7a8-964937c2869c', 'x-amzn-bedrock-invocation-latency': '78', 'x-amzn-bedrock-input-token-count': '9'}, 'RetryAttempts': 0}
2025-12-06 16:31:39,071 - INFO - Using Bedrock Invoke API to generate response


Asking: What are the specific exclusions for Riot, strikes, or malicious damages?
üîé Searching for: 'What are the specific exclusions for Riot, strikes, or malicious damages?'


2025-12-06 16:31:51,091 - INFO - Successfully invoked model amazon.titan-embed-text-v2:0. ResponseMetadata: {'RequestId': '5f507c34-7f0a-4b1f-896b-f02b37e2ddee', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Sat, 06 Dec 2025 11:01:42 GMT', 'content-type': 'application/json', 'content-length': '43437', 'connection': 'keep-alive', 'x-amzn-requestid': '5f507c34-7f0a-4b1f-896b-f02b37e2ddee', 'x-amzn-bedrock-invocation-latency': '73', 'x-amzn-bedrock-input-token-count': '16'}, 'RetryAttempts': 0}
2025-12-06 16:32:01,671 - INFO - Using Bedrock Invoke API to generate response



üèÜ FINAL PROJECT ACCURACY REPORT

Q1: What specific exclusions apply to loss caused by Subsidence?
A: According to the context, the specific exclusions that apply to loss caused by Subsidence of the land on which the home building stands are:

a. normal cracking, settlement or bedding down of new structures,
b. the settlement or movement of made up ground,
c. coastal or river erosion,
d. defective design or workmanship or use of defective materials, or demolition, construction, structural alterations or repair of any property, or groundworks or excavations.
----------------------------------------

Q2: What is the deductible for Personal Property?
A: The deductible for Personal Property is not explicitly mentioned in the context. However, it does mention deductibles for Jewellery & Valuables (5% of the claim amount subject to a minimum of Rs 2500) and portable equipment's (5% of claim amount subject to a minimum of Rs 1000).
----------------------------------------

Q3: What are the

In [14]:
# Cell 8: (Optional) Re-Run Ragas Metrics
# Note: This takes 1-2 minutes and costs a small amount of API usage.

from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy, context_precision
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.run_config import RunConfig
from datasets import Dataset

# 1. Prepare Data from the results you just generated
questions_list = [row['Question'] for row in results]
answers_list = [row['AI Answer'] for row in results]
ground_truths = [
    "Normal cracking, settlement of new structures, movement of made up ground, coastal erosion, defective design.",
    "The document does not state a specific deductible for 'Personal Property', only for Jewellery and Portables.",
    "Temporary or permanent dispossession by government order, or unlawful occupation by any person."
]

# We need to fetch the contexts again manually for Ragas
contexts_list = []
print("fetching contexts for evaluation...")
for q in questions_list:
    # Quick re-fetch of the text the LLM saw
    retrieved_text = intelligent_retrieval(q)
    contexts_list.append([retrieved_text])

data_samples = {
    "question": questions_list,
    "answer": answers_list,
    "contexts": contexts_list,
    "ground_truth": ground_truths
}

ragas_dataset = Dataset.from_dict(data_samples)

# 2. Configure Ragas with Safety Mode (Sequential)
ragas_llm = LangchainLLMWrapper(llm)
ragas_embeddings = LangchainEmbeddingsWrapper(embeddings)
safe_config = RunConfig(max_workers=1, timeout=120, max_retries=3)

# 3. Run
print("üë®‚Äç‚öñÔ∏è Calculating Final Scores...")
eval_results = evaluate(
    ragas_dataset,
    metrics=[faithfulness, answer_relevancy, context_precision],
    llm=ragas_llm,
    embeddings=ragas_embeddings,
    run_config=safe_config,
    raise_exceptions=False
)

print("\n" + "="*50)
print("üèÜ OFFICIAL RAGAS SCORECARD")
print("="*50)
print(eval_results)

fetching contexts for evaluation...
üîé Searching for: 'What specific exclusions apply to loss caused by Subsidence?'


2025-12-06 16:36:10,511 - INFO - Successfully invoked model amazon.titan-embed-text-v2:0. ResponseMetadata: {'RequestId': '9bbd495c-b7a9-4f53-8227-d8358966bec3', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Sat, 06 Dec 2025 11:05:29 GMT', 'content-type': 'application/json', 'content-length': '43375', 'connection': 'keep-alive', 'x-amzn-requestid': '9bbd495c-b7a9-4f53-8227-d8358966bec3', 'x-amzn-bedrock-invocation-latency': '79', 'x-amzn-bedrock-input-token-count': '14'}, 'RetryAttempts': 0}


üîé Searching for: 'What is the deductible for Personal Property?'


2025-12-06 16:36:20,926 - INFO - Successfully invoked model amazon.titan-embed-text-v2:0. ResponseMetadata: {'RequestId': '7df64f13-5668-44b7-aadc-8bc414676274', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Sat, 06 Dec 2025 11:06:17 GMT', 'content-type': 'application/json', 'content-length': '43328', 'connection': 'keep-alive', 'x-amzn-requestid': '7df64f13-5668-44b7-aadc-8bc414676274', 'x-amzn-bedrock-invocation-latency': '84', 'x-amzn-bedrock-input-token-count': '9'}, 'RetryAttempts': 0}


üîé Searching for: 'What are the specific exclusions for Riot, strikes, or malicious damages?'


2025-12-06 16:36:27,671 - INFO - Successfully invoked model amazon.titan-embed-text-v2:0. ResponseMetadata: {'RequestId': '45222c46-ecb4-40e1-b43b-e07e1dcd5813', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Sat, 06 Dec 2025 11:06:26 GMT', 'content-type': 'application/json', 'content-length': '43437', 'connection': 'keep-alive', 'x-amzn-requestid': '45222c46-ecb4-40e1-b43b-e07e1dcd5813', 'x-amzn-bedrock-invocation-latency': '72', 'x-amzn-bedrock-input-token-count': '16'}, 'RetryAttempts': 0}
  ragas_llm = LangchainLLMWrapper(llm)
  ragas_embeddings = LangchainEmbeddingsWrapper(embeddings)


üë®‚Äç‚öñÔ∏è Calculating Final Scores...


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]2025-12-06 16:36:34,386 - INFO - Using Bedrock Invoke API to generate response
2025-12-06 16:36:53,511 - INFO - Using Bedrock Invoke API to generate response
2025-12-06 16:37:13,642 - ERROR - Exception raised in Job[0]: LLMDidNotFinishException(The LLM generation was not completed. Please increase the max_tokens and try again.)
Evaluating:  11%|‚ñà         | 1/9 [00:39<05:16, 39.51s/it]2025-12-06 16:37:13,691 - INFO - Using Bedrock Invoke API to generate response
2025-12-06 16:37:13,696 - INFO - Using Bedrock Invoke API to generate response
2025-12-06 16:37:13,701 - INFO - Using Bedrock Invoke API to generate response
2025-12-06 16:37:42,161 - INFO - Successfully invoked model amazon.titan-embed-text-v2:0. ResponseMetadata: {'RequestId': '05eef54a-6587-4f86-a5e2-7e66d57a2745', 'HTTPStatusCode': 200, 'HTTPHeaders': {'date': 'Sat, 06 Dec 2025 11:07:41 GMT', 'content-type': 'application/json', 'content-length': '43375', 'connection': 'keep-


üèÜ OFFICIAL RAGAS SCORECARD
{'faithfulness': 1.0000, 'answer_relevancy': 0.9556, 'context_precision': 1.0000}
