In [3]:
import boto3
import botocore
from langchain.chains import RetrievalQA
from langchain.llms.bedrock import Bedrock
from langchain.prompts import PromptTemplate
from langchain_community.embeddings import BedrockEmbeddings
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.vectorstores.pgvector import PGVector
from langchain.text_splitter import RecursiveCharacterTextSplitter


config = botocore.config.Config(
    read_timeout=900,
    connect_timeout=900,
    retries={"max_attempts": 0}
)

bedrock_client = boto3.client(
    service_name="bedrock-runtime",
    region_name="us-east-1",
    config=config,
)

loader = DirectoryLoader(
    r"C:\Users\Lenovo\Documents\Project-vs code\Amazon Transcribe\mahendhra\man"
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=0)
texts = text_splitter.split_documents(docs)
embeddings = BedrockEmbeddings(model_id="cohere.embed-multilingual-v3")

CONNECTION_STRING = "postgresql+psycopg2://postgres:serverless123@database-1-instance-1.cxbpo87iqdgv.us-east-1.rds.amazonaws.com:5432/database1"

COLLECTION_NAME = "mahendhra"

db = PGVector.from_documents(
    embedding=embeddings,
    documents=texts,
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
    pre_delete_collection= False
)

template = """
    Generate answers truthfully based only on the given document
    1.Must identify the language of user's question
    2.Must Give the response only in identified user's language in question
    3.Analyse all the text datas and generate accurate answers

for example:
1.if the asked question is tamil language you should give the response in the tamil language only.


{context}
{question}
"""

In [None]:
retriever = db.as_retriever(search_type='similarity', search_kwargs={"k": 3})
llm = Bedrock(model_id="anthropic.claude-v2:1",client=bedrock_client,model_kwargs = {"temperature":1e-10,"max_tokens_to_sample": 20000})
qa_prompt = PromptTemplate(template=template, input_variables=["context","question"])
chain_type_kwargs = { "prompt": qa_prompt, "verbose": False }
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs=chain_type_kwargs,
    verbose=False
)

In [None]:
question="What is the quantity of oil in the Transaxel"
result = qa.run(question)
print(result)

In [7]:
import concurrent.futures
import boto3
import botocore
from langchain.chains import RetrievalQA
from langchain.llms.bedrock import Bedrock
from langchain.prompts import PromptTemplate
from langchain_community.embeddings import BedrockEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.vectorstores.pgvector import PGVector
from langchain.text_splitter import RecursiveCharacterTextSplitter


config = botocore.config.Config(
    read_timeout=900,
    connect_timeout=900,
    retries={"max_attempts": 0}
)

bedrock_client = boto3.client(
    service_name="bedrock-runtime",
    region_name="us-east-1",
    config=config,
)

CONNECTION_STRING = "postgresql+psycopg2://postgres:serverless123@database-1-instance-1.cxbpo87iqdgv.us-east-1.rds.amazonaws.com:5432/database1"

COLLECTION_NAME = "mahendhra_rise1"

loader = PyPDFLoader(
    r"C:\Users\Lenovo\Documents\Project-vs code\Amazon Transcribe\mahendhra\MAN01074RepairManualXUV700DieselATRev1.pdf"
)

template = """
    You will be given a query, Your task is to find an answer or give information about the query with respect to the document by performing a similarity search.
    Consider the following conditions,
    - If query is a topic, look for information or statements or sentences which are related to the query in the document.
    - The answer does not need to be specifically related, it can be loosely related as well.
    - If you find any statements that are directly related to the query, then explain those statements in easy or layman terms, so someone with no expertise in that field can understand.
    - If query is a question, understand the context of query and then look for similar statements or sentences in the document which also have the same context.
    - It is not necessary to return direct statements from the document as an answer. You can also return loosely related answers to the query.
    - If you cannot find any direct statements or directly relevant answers, do not return that you cannot find any direct statements. Then, you have to perform a semantic search instead of looking for exact words in the document, that is, understanding the context from the query and looking for something similar in the document.
    - It is not mandatory to look for direct statements, you can also look at statements with a similar meaning and context.
    - It is fine if you cannot find any directly related statements in the document. You can look for sentences with similar meaning and can also return loosely related answers.
    - Elaborate the answer as much as you can.
    - If the table datas are there must extract and give the accurate answers
    - If you cannot find a relevant answer, then perform a similarity search on all statements which have a similar meaning to the query, and the document.
    - If even after doing the search on similar statements, you can't find an answer, you can just say that you are not able to find any answer without saying anything else.
    - The output should just be a bullet list of points which has the summary of all points obtained from the search and nothing else. Do not say anything like you have or haven't found directly related answers.
    - Please refrain from returning the process or steps followed to obtain the answer, you can only return the answers which are closely or loosely related to the query and do not return anything else.
    - If is English: Return the answer obtained as the output.
    - Else: translate the answer obtained when performing a similarity search into and then return as output.
    - Do not return anything other than the bullet list of points.


{context}
{question}
"""

# Create a TextSplitter object
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=0)

# Create an Embeddings object
embeddings = BedrockEmbeddings(model_id="cohere.embed-multilingual-v3")

# Define a function to split and embed a document
def split_and_embed(doc):
    texts = text_splitter.split_documents([doc])
    return embeddings.embed_documents(texts)

# Create a list of documents
docs = loader.load()

# Create a ThreadPoolExecutor
with concurrent.futures.ThreadPoolExecutor() as executor:
    # Use the executor to run the split_and_embed function on all documents
    embeddings_futures = [executor.submit(split_and_embed, doc) for doc in docs]
    for doc in docs:
    doc_str = doc.page_content
    doc_str = doc_str.replace("old_text", "new_text")
    doc.page_content = doc_str

    embeddings_list = []
    # Wait for all the embeddings to be calculated
    for embeddings_future in concurrent.futures.as_completed(embeddings_futures):
        embeddings_list.append(embeddings_future.result())

        db = PGVector.from_documents(
        embedding=embeddings,
        documents=embeddings_list,
        collection_name=COLLECTION_NAME,
        connection_string=CONNECTION_STRING,
        pre_delete_collection= False
        )

AttributeError: 'Document' object has no attribute 'replace'

In [None]:
retriever = db.as_retriever(search_type='similarity', search_kwargs={"k": 3})
llm = Bedrock(model_id="anthropic.claude-v2:1",client=bedrock_client,model_kwargs = {"temperature":1e-10,"max_tokens_to_sample": 20000})
qa_prompt = PromptTemplate(template=template, input_variables=["context","question"])
chain_type_kwargs = { "prompt": qa_prompt, "verbose": False }
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs=chain_type_kwargs,
    verbose=False
)

In [None]:
question="What is the quantity of oil in the Transaxel"
result = qa.run(question)
print(result)

In [8]:
import concurrent.futures
import boto3
import botocore
from langchain.chains import RetrievalQA
from langchain.llms.bedrock import Bedrock
from langchain.prompts import PromptTemplate
from langchain_community.embeddings import BedrockEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.vectorstores.pgvector import PGVector
from langchain.text_splitter import RecursiveCharacterTextSplitter


config = botocore.config.Config(
    read_timeout=900,
    connect_timeout=900,
    retries={"max_attempts": 0}
)

bedrock_client = boto3.client(
    service_name="bedrock-runtime",
    region_name="us-east-1",
    config=config,
)

CONNECTION_STRING = "postgresql+psycopg2://postgres:serverless123@database-1-instance-1.cxbpo87iqdgv.us-east-1.rds.amazonaws.com:5432/database1"

COLLECTION_NAME = "mahendhra_rise1"

loader = PyPDFLoader(
    r"C:\Users\Lenovo\Documents\Project-vs code\Amazon Transcribe\mahendhra\MAN01074RepairManualXUV700DieselATRev1.pdf"
)

template = """
    You will be given a query, Your task is to find an answer or give information about the query with respect to the document by performing a similarity search.
    Consider the following conditions,
    - If query is a topic, look for information or statements or sentences which are related to the query in the document.
    - The answer does not need to be specifically related, it can be loosely related as well.
    - If you find any statements that are directly related to the query, then explain those statements in easy or layman terms, so someone with no expertise in that field can understand.
    - If query is a question, understand the context of query and then look for similar statements or sentences in the document which also have the same context.
    - It is not necessary to return direct statements from the document as an answer. You can also return loosely related answers to the query.
    - If you cannot find any direct statements or directly relevant answers, do not return that you cannot find any direct statements. Then, you have to perform a semantic search instead of looking for exact words in the document, that is, understanding the context from the query and looking for something similar in the document.
    - It is not mandatory to look for direct statements, you can also look at statements with a similar meaning and context.
    - It is fine if you cannot find any directly related statements in the document. You can look for sentences with similar meaning and can also return loosely related answers.
    - Elaborate the answer as much as you can.
    - If the table datas are there must extract and give the accurate answers
    - If you cannot find a relevant answer, then perform a similarity search on all statements which have a similar meaning to the query, and the document.
    - If even after doing the search on similar statements, you can't find an answer, you can just say that you are not able to find any answer without saying anything else.
    - The output should just be a bullet list of points which has the summary of all points obtained from the search and nothing else. Do not say anything like you have or haven't found directly related answers.
    - Please refrain from returning the process or steps followed to obtain the answer, you can only return the answers which are closely or loosely related to the query and do not return anything else.
    - If is English: Return the answer obtained as the output.
    - Else: translate the answer obtained when performing a similarity search into and then return as output.
    - Do not return anything other than the bullet list of points.


{context}
{question}
"""

# Create a TextSplitter object
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)

# Create an Embeddings object
embeddings = BedrockEmbeddings(model_id="cohere.embed-multilingual-v3")

# Define a function to split and embed a document
def split_and_embed(doc):
    texts = text_splitter.split_documents([doc.page_content])  # Convert Document object to string
    return embeddings.embed_documents(texts)

# Create a list of documents
docs = loader.load()

# Create a ThreadPoolExecutor
with concurrent.futures.ThreadPoolExecutor() as executor:
    # Use the executor to run the split_and_embed function on all documents
    embeddings_futures = [executor.submit(split_and_embed, doc) for doc in docs]
    embeddings_list = []
    # Wait for all the embeddings to be calculated
    for embeddings_future in concurrent.futures.as_completed(embeddings_futures):
        embeddings_list.append(embeddings_future.result())

        db = PGVector.from_documents(
            embedding=embeddings,
            documents=embeddings_list,
            collection_name=COLLECTION_NAME,
            connection_string=CONNECTION_STRING,
            pre_delete_collection=False
        )


AttributeError: 'str' object has no attribute 'page_content'