In [5]:
import os
from dotenv import load_dotenv
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_aws import ChatBedrock, BedrockEmbeddings
from langchain_pinecone import Pinecone as LangchainPinecone
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from pinecone import Pinecone, ServerlessSpec
import boto3

In [2]:
load_dotenv()

print("sucess")

sucess


In [9]:
boto3_session = boto3.Session(
    aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
    region_name=os.getenv("AWS_DEFAULT_REGION", "us-east-1")
)

bedrock_client = boto3_session.client("bedrock-runtime", region_name="us-east-1")

print(f"Connecting to AWS Bedrock in region: {os.getenv('AWS_DEFAULT_REGION')}...")

Connecting to AWS Bedrock in region: None...


In [11]:
llm = ChatBedrock(
    model_id="us.anthropic.claude-3-5-haiku-20241022-v1:0", 
    client=bedrock_client, 
    model_kwargs={"temperature": 0.1, "max_tokens": 512} 
)

embeddings = BedrockEmbeddings(
    model_id="amazon.titan-embed-text-v2:0",
    client=bedrock_client
)

print("‚úÖ AWS Bedrock models initialized!")

‚úÖ AWS Bedrock models initialized!


In [12]:
data = """
Artificial Intelligence (AI) is transforming the world. Machine Learning is a subset of AI 
that enables systems to learn from data. Deep Learning uses neural networks with multiple layers 
to process complex patterns. Natural Language Processing (NLP) helps machines understand human language.
Large Language Models like GPT and Llama are revolutionizing how we interact with AI systems.
Retrieval-Augmented Generation (RAG) combines retrieval and generation for better responses.
Vector databases store embeddings for efficient similarity search in AI applications.
"""

In [19]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 200,
    chunk_overlap = 50,
    separators=["\n\n", "\n","."," ",""]
)

chunks = text_splitter.split_text(data)
print(f"Data split into {len(chunks)} chunks ")
for i, chunk in enumerate(chunks):
    print(f"--- chunk {i+1} --- \n\n {chunk} \n")

Data split into 3 chunks 
--- chunk 1 --- 

 Artificial Intelligence (AI) is transforming the world. Machine Learning is a subset of AI 
that enables systems to learn from data. Deep Learning uses neural networks with multiple layers 

--- chunk 2 --- 

 to process complex patterns. Natural Language Processing (NLP) helps machines understand human language.
Large Language Models like GPT and Llama are revolutionizing how we interact with AI systems. 

--- chunk 3 --- 

 Retrieval-Augmented Generation (RAG) combines retrieval and generation for better responses.
Vector databases store embeddings for efficient similarity search in AI applications. 



In [29]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

index_name = "pash-index-2"


if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=1024,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws",region="us-east-1")
        
    )
    print("pinecone index create successfully")
else:
    print("existing pinecone available")    

pinecone index create successfully


In [30]:
vectorstore = LangchainPinecone.from_texts(
    texts=chunks,
    embedding=embeddings,
    index_name=index_name
)

print("document embedded and store in pinecone")

document embedded and store in pinecone


In [None]:
## this is for FAISS vector store

vectorstore = FAISS.from_texts(
    texts=chunks,
    embedding=embeddings
)

print("‚úÖ Documents embedded and stored in FAISS")

In [38]:
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3}
)


In [39]:
from langchain_core.runnables import RunnableLambda

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

format_docs_runnable = RunnableLambda(format_docs)


In [40]:
from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate(
    template="""
You are an expert Insurance Assistant. Use the following pieces of retrieved context to answer the question.
If the answer is not in the context, just say that you don't know. Do not try to make up an answer.

CONTEXT:
{context}

QUESTION:
{question}

ANSWER:
""",
    input_variables=["context", "question"]
)


In [41]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

rag_chain = (
    RunnableParallel(
        context=retriever | format_docs_runnable,
        question=RunnablePassthrough()
    )
    | prompt          # ‚úÖ PromptTemplate, NOT string
    | llm
    | StrOutputParser()
)

print("‚úÖ RAG chain built using LCEL!")


‚úÖ RAG chain built using LCEL!


In [42]:
test_question = "What is RAG and how does it work?"

print(f"\nüîç Question: {test_question}\n")
response = rag_chain.invoke(test_question)
print(f"üí° Answer:\n{response}")


üîç Question: What is RAG and how does it work?

üí° Answer:
Based on the provided context, RAG (Retrieval-Augmented Generation) is a technique that combines retrieval and generation to improve AI responses. The context indicates that RAG helps in generating more accurate and contextually relevant answers by first retrieving relevant information before generating a response.

While the context provides a basic definition, it doesn't go into extensive detail about the specific mechanics of how RAG works. The context suggests that it involves using techniques like vector databases to store and efficiently search embeddings, which can help in retrieving relevant information quickly.

The context also mentions related AI technologies like Natural Language Processing (NLP), Large Language Models, and Machine Learning, which are likely components that support RAG's functionality, but doesn't explicitly explain RAG's full operational process.

So in summary, RAG is a method that enhances A