In [None]:
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import BedrockEmbeddings
from langchain.vectorstores import FAISS
from langchain.indexes import VectorstoreIndexCreator
import boto3
from langchain_community.llms.bedrock import Bedrock

## Load the document into the memory

In [None]:
# data_load = PyPDFLoader("e23076_uber-ars.pdf")
data_load = PyPDFLoader("https://www.upl-ltd.com/images/people/downloads/Leave-Policy-India.pdf")
data_test = data_load.load()
len(data_test)


In [None]:
data_test[2]

## Data Transform 

In [None]:
data_split = RecursiveCharacterTextSplitter(separators=["\n\n","\n"," ",""],chunk_size=100,chunk_overlap=10)

In [None]:
data_sample = "LangChain is a framework designed to simplify the creation of applications using large language models. As a language model integration framework, LangChain's use-cases largely overlap with those of language models in general, including document analysis and summarization, chatbots, and code analysis"
data_split_test = data_split.split_text(data_sample)
data_split_test

## Embedding 

In [None]:
ACCESS_KEY = "your access key"
SECRET_ACCESS_KEY = "your secret access key"

In [None]:
bedrock_client = boto3.client('bedrock-runtime',aws_access_key_id=ACCESS_KEY,aws_secret_access_key=SECRET_ACCESS_KEY)

In [None]:
data_embedding = BedrockEmbeddings(
    client=bedrock_client,
    region_name="us-east-1",
    model_id="amazon.titan-embed-text-v2:0",
)

In [None]:
data_index = VectorstoreIndexCreator(
    text_splitter=data_split,
    embedding=data_embedding,
    vectorstore_cls=FAISS
)

In [None]:
db_index = data_index.from_loaders([data_load])

In [None]:
llm = Bedrock(
        client=bedrock_client,
        model_id="meta.llama3-70b-instruct-v1:0",
        model_kwargs={
            "temperature":0.1,
            "max_tokens_to_sample":3000,
            "top_p":0.9
        }
        )

In [None]:
question = "can you give me details about sick leave policy in the company ?"

In [None]:
hr_rag_query = db_index.query(question=question,llm=llm)