In [1]:
import boto3
import os
import json
from dotenv import load_dotenv

In [None]:
load_dotenv()

# Configuration
KNOWLEDGE_BASE_ID = os.getenv("BEDROCK_KNOWLEDGE_BASE_ID")
# Choose the model you want to use for generation
MODEL_ID = os.getenv("BEDROCK_MODEL_ID_TITAN_G1_PREMIER")
AWS_REGION = os.getenv("AWS_DEFAULT_REGION", "us-east-1")
AWS_PROFILE = os.getenv("AWS_PROFILE") 
print(AWS_PROFILE)

In [12]:
session = boto3.Session(profile_name=AWS_PROFILE)

In [None]:
"""creds = session.get_credentials().get_frozen_credentials()
print("Access Key:", creds.access_key)
print("Secret Key:", creds.secret_key)
print("Session Token:", creds.token)
print("Profile Used:", session.profile_name or "default")"""

In [14]:
bedrock_agent_runtime = session.client(
    service_name="bedrock-agent-runtime",
    region_name=AWS_REGION
)

In [15]:
def load_prompt_template(filepath):
    try:
        with open(filepath, 'r') as f:
            return f.read()
    except FileNotFoundError:
        print(f"Error: Prompt file not found at {filepath}")
        return None # Or a default prompt

In [16]:
def _model_id_to_arn(model_id: str, region: str = AWS_REGION) -> str:
    """
    Convert a Bedrock modelId to the modelArn string Bedrock APIs expect.
    """
    return f"arn:aws:bedrock:{region}::foundation-model/{model_id}"

MODEL_ARN = _model_id_to_arn(MODEL_ID,AWS_REGION)
print(MODEL_ARN)

arn:aws:bedrock:us-east-1::foundation-model/amazon.titan-text-premier-v1:0


In [17]:
def query_knowledge_base(query_text, prompt_template_text=None, number_of_results=5):
    if not MODEL_ARN:
        print("Error: Model ARN is not set. Please check your .env file and variable name (e.g., BEDROCK_MODEL_ARN_LLAMA3).")
        return None
    if not KNOWLEDGE_BASE_ID:
        print("Error: Knowledge Base ID is not set. Please check your .env file and variable name (BEDROCK_KNOWLEDGE_BASE_ID).")
        return None

    # This is the payload structure that will be passed to retrieve_and_generate
    # Note that knowledgeBaseId is NOT a top-level key in this payload,
    # but modelArn is used inside knowledgeBaseConfiguration.
    payload_for_api = {
        "input": {
            "text": query_text
        },
        "retrieveAndGenerateConfiguration": {
            "type": "KNOWLEDGE_BASE",
            "knowledgeBaseConfiguration": {
                "knowledgeBaseId": KNOWLEDGE_BASE_ID,
                # Use modelArn for generation within knowledgeBaseConfiguration
                "modelArn": MODEL_ARN,
                "retrievalConfiguration": {
                    "vectorSearchConfiguration": {
                        "numberOfResults": number_of_results
                    }
                }
            }
        }
        # sessionConfiguration can be added here if managing session state
        # "sessionConfiguration": {
        #     "sessionId": "your-session-id" # Optional
        # }
    }

    if prompt_template_text:
        kb_config = payload_for_api["retrieveAndGenerateConfiguration"]["knowledgeBaseConfiguration"]
        if "generationConfiguration" not in kb_config:
            kb_config["generationConfiguration"] = {}
        
        kb_config["generationConfiguration"]["promptTemplate"] = {
            "textPromptTemplate": prompt_template_text
        }
        # Add other model-specific inference parameters if needed:
        # kb_config["generationConfiguration"]["inferenceConfig"] = {
        #     "textGenerationConfig": { # For Llama models
        #         "maxTokenCount": 1024,
        #         "stopSequences": [],
        #         "temperature": 0.7,
        #         "topP": 0.9
        #     }
        # }

    #print(f"Sending payload to Bedrock: {json.dumps(payload_for_api, indent=2)}")

    try:
        # Call retrieve_and_generate by unpacking the payload_for_api dictionary.
        # The knowledgeBaseId is part of the nested configuration, not a direct kwarg here.
        response = bedrock_agent_runtime.retrieve_and_generate(
            **payload_for_api
        )
        return response
    except Exception as e:
        print(f"Error during Bedrock retrieve_and_generate: {e}")
        return None


In [28]:
def load_prompt_template(filepath):
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            return f.read()
    except FileNotFoundError:
        print(f"Error: Prompt file not found at {filepath}")
    except UnicodeDecodeError:
        print(f"Error: Failed decoding {filepath} as UTF-8")

# then
custom_prompt_filepath = '../prompts/prompt2.txt'
generation_prompt = load_prompt_template(custom_prompt_filepath)
print(generation_prompt)


###  SYSTEM  ###############################################################
You are **PittGPT**, the University of Pittsburgh’s question-answering assistant.
Your answers MUST follow these rules:

1. **Use ONLY the materials provided in <DOCUMENTS>.**  
   • Do not invent new facts.  
   • If the documents cannot answer, say so briefly and suggest where the user might look.

2. **Cite every factual claim** with the Reddit author’s handle and a footnote number, e.g.  
   *Office hours boosted my grade* — u/ListDisastrous549 [1]

3. **List the source links at the very end** under the heading  
   **“Original Reddit threads for verification”** using the same footnote numbers.

4. **Freshness & reliability**  
   • When a post is ≥ 3 years old OR the topic is likely to have changed (policies, tuition, etc.), add  
   “*(Info from YYYY; may have changed.)*”    
   • If sources conflict, say which view is stronger and why.

5. **Tone** — helpful, Avoid jargon unless the user is technical.



In [23]:
def print_generated_response(bedrock_response: dict) -> None:
    """
    Print only the model-generated answer.
    """
    if not bedrock_response:
        print("No response returned from Bedrock.")
        return

    try:
        text = bedrock_response["output"]["text"]
        print("\n--- Generated Response ---")
        print(text)
    except KeyError:
        print("Expected key 'output.text' not found in Bedrock response.")


def print_citations(bedrock_response: dict) -> None:
    """
    Print only the citation information (content snippet, S3 URI, metadata).
    """
    if not bedrock_response:
        print("No response returned from Bedrock.")
        return

    citations = bedrock_response.get("citations", [])
    if not citations:
        print("No citations found or citations format not as expected.")
        return

    print("\n--- Retrieved Citations (Sources) ---")
    for i, citation_group in enumerate(citations, start=1):
        print(f"\nCitation Group {i}:")
        refs = citation_group.get("retrievedReferences", [])
        if not refs:
            print("  No retrieved references in this citation group.")
            continue

        for j, ref in enumerate(refs, start=1):
            print(f"  Reference {j}:")
            snippet = (
                ref.get("content", {}).get("text", "")
            )
            if snippet:
                print(f"    Content Snippet: {snippet[:300]}...")
            s3_uri = (
                ref.get("location", {})
                   .get("s3Location", {})
                   .get("uri")
            )
            if s3_uri:
                print(f"    S3 Location: {s3_uri}")
            metadata = ref.get("metadata")
            if metadata:
                print(f"    Metadata: {metadata}")

In [29]:
if __name__ == "__main__":
    if not MODEL_ARN:
        print("Execution failed: MODEL_ARN not set. Check .env and environment variable name BEDROCK_MODEL_ARN_LLAMA3.")
    elif not KNOWLEDGE_BASE_ID:
        print("Execution failed: KNOWLEDGE_BASE_ID not set. Check .env and environment variable name.")
    else:
        user_query = (
            "I’m retaking Chem 1 over the summer and enrolled in her class because "
            "I couldn’t enroll in the first summer session. I’m concerned by her 1.9 "
            "on RateMyProfessor, especially because I did badly in the class the first time. "
            "Has anyone taken her class, and is getting an A in her class doable?"
        )

        bedrock_response = query_knowledge_base(
            user_query,
            prompt_template_text=generation_prompt
        )

        # Now call whichever view you need
        print_generated_response(bedrock_response)
        print_citations(bedrock_response)


Error during Bedrock retrieve_and_generate: An error occurred (ExpiredTokenException) when calling the RetrieveAndGenerate operation: The security token included in the request is expired
No response returned from Bedrock.
No response returned from Bedrock.
