In [1]:
import json
import boto3
import os
from dotenv import load_dotenv
import time
from typing import List, Dict, Any
import pandas as pd
from IPython.display import display, HTML, Image

# Load environment variables
load_dotenv()

True

In [2]:
# Configure your settings here
KB_ID = "PXQS78QKBV"  # Your knowledge base ID
LLM_MODEL_ID = "us.meta.llama3-2-11b-instruct-v1:0"  # LLM model for response generation
EMBEDDING_MODEL_ID = "amazon.titan-embed-text-v2:0"  # Embedding model
REGION = "us-east-1"  # AWS region

In [6]:
# Initialize Bedrock clients
bedrock_runtime = boto3.client(
    'bedrock-runtime',
    region_name=REGION,
    aws_access_key_id=os.environ.get('Home_AWS_ACCESS_KEY_ID'),
    aws_secret_access_key=os.environ.get('Home_AWS_SECRET_ACCESS_KEY')
)

bedrock_agent = boto3.client(
    'bedrock-agent-runtime',
    region_name=REGION,
    aws_access_key_id=os.environ.get('Home_AWS_ACCESS_KEY_ID'),
    aws_secret_access_key=os.environ.get('Home_AWS_SECRET_ACCESS_KEY')
)

print(f"Initialized Bedrock clients with Knowledge Base ID: {KB_ID}")
print(f"LLM Model: {LLM_MODEL_ID}")
print(f"Embedding Model: {EMBEDDING_MODEL_ID}")

Initialized Bedrock clients with Knowledge Base ID: PXQS78QKBV
LLM Model: us.meta.llama3-2-11b-instruct-v1:0
Embedding Model: amazon.titan-embed-text-v2:0


In [31]:
def generate_embedding(text):
    """Generate embedding for a text using Bedrock Titan Embedding model."""
    if not text:
        return []
        
    try:
        # Prepare the request body for the model
        request_body = json.dumps({
            "inputText": text
        })
        
        # Call the Bedrock runtime
        response = bedrock_runtime.invoke_model(
            modelId=EMBEDDING_MODEL_ID,
            body=request_body
        )
        
        # Process the response
        response_body = json.loads(response.get('body').read())
        embedding = response_body.get('embedding')
        
        return embedding
    except Exception as e:
        print(f"Error generating embedding: {str(e)}")
        return []

In [32]:
def retrieve_from_kb(query, top_k=6, score_threshold=0.6):
    """Retrieve relevant content from Knowledge Base based on query."""
    try:
        # Retrieval using text query
        print("Retrieving from Knowledge Base...")
        response = bedrock_agent.retrieve(
            knowledgeBaseId=KB_ID,
            retrievalQuery={
                "text": query
            },
            retrievalConfiguration={
                "vectorSearchConfiguration": {
                    "numberOfResults": top_k
                }
            }
        )
        
        # Process retrieved results
        retrieved_results = response.get("retrievalResults", [])
        print(f"Retrieved {len(retrieved_results)} results in total")
        
        # Print first result metadata to debug
        if retrieved_results:
            print(f"First result metadata sample: {json.dumps(retrieved_results[0].get('metadata', {}), indent=2)}")
        
        # Filter by score threshold and sort by relevance
        filtered_results = []
        for result in retrieved_results:
            # Extract content and metadata
            content = result.get("content", {}).get("text", "")
            metadata = result.get("metadata", {})
            score = result.get("score", 0)
            
            # Skip if score is below threshold
            if score < score_threshold:
                continue
            
            filtered_results.append({
                "content": content,
                "metadata": metadata,
                "score": score
            })
        
        # Sort by score
        filtered_results = sorted(filtered_results, key=lambda x: x["score"], reverse=True)
        
        print(f"Filtered to {len(filtered_results)} results above threshold {score_threshold}")
        return {
            "results": filtered_results
        }
    except Exception as e:
        print(f"Error retrieving from Knowledge Base: {str(e)}")
        import traceback
        traceback.print_exc()
        return {"results": []}

In [54]:
import re
def generate_llm_response(query, retrieved_data):
    """Generate response using Bedrock LLM with retrieved data."""
    try:
        # Create context from retrieved data
        context = "I'm providing you with relevant information to answer the query.\n\n"
        
        # Add information from all results
        if retrieved_data["results"]:
            context += "RELEVANT INFORMATION:\n"
            for i, result in enumerate(retrieved_data["results"], 1):
                context += f"Source {i}:\n"
                if "title" in result["metadata"]:
                    context += f"Title: {result['metadata'].get('title', 'Untitled')}\n"
                context += f"Content: {result['content']}\n\n"
        
        # Create prompt for LLM with very specific instructions
        prompt = f"""
Context:
{context}

User Query: {query}

IMPORTANT INSTRUCTIONS FOR RESPONSE:
1. Answer the query in 2-4 sentences maximum.
2. Include only essential information directly related to the query.
3. Do not repeat any information.
4. Do not use any formatting like tables, bullets, or separator lines.
5. Do not include any disclaimers, follow-ups, or offers of additional help.
6. Provide just the direct answer to the query and nothing more.

Answer:
"""
        print("Generating response with LLM...")
        # Call Bedrock LLM model with lower max_gen_len
        request_body = json.dumps({
            "prompt": prompt,
            "max_gen_len": 128,  # Very limited to prevent repetition
            "temperature": 0.01,  # Nearly deterministic
            "top_p": 0.1  # Very focused sampling
        })
        
        # Invoke the model
        response = bedrock_runtime.invoke_model(
            modelId=LLM_MODEL_ID,
            body=request_body
        )
        
        # Parse the response
        response_body = json.loads(response.get('body').read())
        llm_response = response_body.get('generation', '')
        
        # Clean up the response
        # 1. Remove table formatting
        llm_response = llm_response.split('|')[0]
        llm_response = llm_response.replace('---', '')
        
        # 2. Remove repetitive sentences by splitting into sentences and removing duplicates
        sentences = [s.strip() for s in re.split(r'[.!?]+', llm_response) if s.strip()]
        unique_sentences = []
        for sentence in sentences:
            if sentence not in unique_sentences:
                unique_sentences.append(sentence)
        
        # 3. Reconstruct the response with only unique sentences
        cleaned_response = '. '.join(unique_sentences[:4])  # Limit to 4 sentences
        if cleaned_response and not cleaned_response.endswith(('.', '!', '?')):
            cleaned_response += '.'
        
        return cleaned_response
    except Exception as e:
        print(f"Error generating LLM response: {str(e)}")
        return f"I'm sorry, I couldn't generate a response due to an error: {str(e)}"

In [55]:
def answer_query(query):
    """End-to-end function to answer a user query using RAG."""
    print(f"Processing query: {query}")
    
    # Step 1: Retrieve relevant information from Knowledge Base
    print("Retrieving relevant information...")
    retrieved_data = retrieve_from_kb(query)
    
    # Step 2: Generate response using LLM
    print("Generating response...")
    response = generate_llm_response(query, retrieved_data)
    
    # Return response with retrieved data for transparency
    return {
        "query": query,
        "response": response,
        "retrieved_data": retrieved_data
    }

In [56]:
def display_results(result):
    """Display the results in a nice format"""
    # Display the response
    print("\n" + "="*80)
    print(f"QUERY: {result['query']}")
    print("="*80)
    print(f"RESPONSE:\n{result['response']}")
    print("="*80)

In [57]:
# Execute a query and measure time
start_time = time.time()
query = "How to find RH850 device file?"
result = answer_query(query)
end_time = time.time()

# Display results
display_results(result)
print(f"\nTotal processing time: {end_time - start_time:.2f} seconds")

Processing query: How to find RH850 device file?
Retrieving relevant information...
Retrieving from Knowledge Base...
Retrieved 6 results in total
First result metadata sample: {
  "x-amz-bedrock-kb-source-uri": "s3://renesas-rag/kb-data/bedrock_kb_format.json",
  "x-amz-bedrock-kb-chunk-id": "1%3A0%3AweNke5UBsA7k3aYZmfcA",
  "x-amz-bedrock-kb-data-source-id": "VM0LEKRLQX"
}
Filtered to 3 results above threshold 0.6
Generating response...
Generating response with LLM...

QUERY: How to find RH850 device file?
RESPONSE:
The RH850 device file can be found in the install directory, specifically in the path C:\Program Files (x86)\Renesas Electronics\CS\Device\RH850\Devicefile. It is not necessary to select the file manually, as it is done automatically if a device is selected in the CS Device Selection dialogue. If the device is not listed, check that you are using the latest version of CS. The device file is a binary file that describes the device and is used to create header-files for a C