In [1]:
import os
from dotenv import load_dotenv
import boto3

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_aws import ChatBedrock, BedrockEmbeddings
from langchain_community.vectorstores import FAISS

from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import (
    RunnablePassthrough,
    RunnableParallel,
    RunnableLambda
)


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()
print("‚úÖ Environment loaded")


‚úÖ Environment loaded


In [3]:
boto3_session = boto3.Session(
    aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
    region_name=os.getenv("AWS_DEFAULT_REGION", "us-east-1")
)

bedrock_client = boto3_session.client(
    "bedrock-runtime",
    region_name="us-east-1"
)

print(f"Connecting to AWS Bedrock in region: {os.getenv('AWS_DEFAULT_REGION')}")


Connecting to AWS Bedrock in region: None


In [4]:
llm = ChatBedrock(
    model_id="us.anthropic.claude-3-5-haiku-20241022-v1:0",
    client=bedrock_client,
    model_kwargs={
        "temperature": 0.1,
        "max_tokens": 512
    }
)

embeddings = BedrockEmbeddings(
    model_id="amazon.titan-embed-text-v2:0",
    client=bedrock_client
)

print("‚úÖ Bedrock LLM & Embeddings initialized")


‚úÖ Bedrock LLM & Embeddings initialized


In [5]:
data = """
Artificial Intelligence (AI) is transforming the world. Machine Learning is a subset of AI 
that enables systems to learn from data. Deep Learning uses neural networks with multiple layers 
to process complex patterns. Natural Language Processing (NLP) helps machines understand human language.
Large Language Models like GPT and Llama are revolutionizing how we interact with AI systems.
Retrieval-Augmented Generation (RAG) combines retrieval and generation for better responses.
Vector databases store embeddings for efficient similarity search in AI applications.
"""

In [6]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=50,
    separators=["\n\n", "\n", ".", " ", ""]
)

chunks = text_splitter.split_text(data)

print(f"‚úÖ Data split into {len(chunks)} chunks")


‚úÖ Data split into 3 chunks


In [7]:
vectorstore = FAISS.from_texts(
    texts=chunks,
    embedding=embeddings
)

print("‚úÖ Documents embedded and stored in FAISS")


‚úÖ Documents embedded and stored in FAISS


In [8]:
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3}
)


In [9]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

format_docs_runnable = RunnableLambda(format_docs)


In [10]:
prompt = PromptTemplate(
    template="""
You are an expert Insurance Assistant. Use the following pieces of retrieved context to answer the question.
If the answer is not in the context, just say that you don't know. Do not try to make up an answer.

CONTEXT:
{context}

QUESTION:
{question}

ANSWER:
""",
    input_variables=["context", "question"]
)


In [11]:
rag_chain = (
    RunnableParallel(
        context=retriever | format_docs_runnable,
        question=RunnablePassthrough()
    )
    | prompt
    | llm
    | StrOutputParser()
)

print("‚úÖ RAG chain built using LCEL + FAISS")


‚úÖ RAG chain built using LCEL + FAISS


In [12]:
test_question = "What is RAG and how does it work?"

print(f"\nüîç Question: {test_question}\n")
response = rag_chain.invoke(test_question)
print(f"üí° Answer:\n{response}")



üîç Question: What is RAG and how does it work?

üí° Answer:
Based on the provided context, RAG (Retrieval-Augmented Generation) is a technique that combines retrieval and generation to improve AI responses. The context indicates that RAG involves using retrieval mechanisms, likely with vector databases that store embeddings, to enhance the generation of responses. 

However, the context does not provide a detailed explanation of exactly how RAG works. While it mentions some related concepts like vector databases and AI technologies, the specific mechanics of RAG are not fully elaborated in this context.

So my most accurate response is: RAG combines retrieval and generation techniques to create better AI responses, but I don't have enough detailed information from the context to explain precisely how it works.


In [None]:
#‚ö†Ô∏è Optional (VERY IMPORTANT)

#If you want persistence (store FAISS index on disk):

vectorstore.save_local("faiss_index")


#Load later:

vectorstore = FAISS.load_local(
    "faiss_index",
    embeddings,
    allow_dangerous_deserialization=True
)