In [None]:
import boto3
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.llms.bedrock import Bedrock
from reportlab.pdfgen import canvas
from langchain.document_loaders import S3FileLoader
from bedrock import get_bedrock_client
bedrock_client = get_bedrock_client(region='us-east-1', runtime=True)

s3 = boto3.client('s3')

bucket_name = 'my-s3-doc-loader'
response = s3.list_objects_v2(Bucket=bucket_name)

file_names = []
allowed_formats = ['.txt', '.pdf', '.doc', '.docx']

for obj in response['Contents']:
    key = obj['Key']
    
    # Check if the file has an allowed format
    if any(key.lower().endswith(format) for format in allowed_formats):
        file_names.append(key)

for context_key in file_names:
    loader = S3FileLoader(bucket=bucket_name, key=context_key)
    context_content = loader.load()


text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000)
context_texts = text_splitter.split_documents(context_content)

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(documents=context_texts, embedding=embeddings)

retriever = db.as_retriever(search_type='mmr', search_kwargs={"k": 5})

template = """
Human: Answer truthfully based on the given question, fetch the answer only from the given text documents
Instruction:
1.If multiple files are there, read the all the files each and every lines accurately for to generate answer
2.If there is no text found in the text document about the asked question ,"print no result found" do not print any results if answer not found,do not search the answers from outside
3.Generate answer whatever available related to the question
4.Must complete the sentence in the result fully, do not leave results incomplete format in the end.
text:{context}
question:{question}
Assistant:"""
qa_prompt = PromptTemplate(template=template, input_variables=["context","question"])
chain_type_kwargs = { "prompt": qa_prompt}
llm = Bedrock(model_id="anthropic.claude-v2",client=bedrock_client)
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs=chain_type_kwargs,
    verbose=False
)