In [3]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv("OPENAI_API_KEY_1")

# For openai key
import os
os.environ["OPENAI_API_KEY"] = api_key

In [4]:

loader = PyPDFLoader("bertolini.pdf")
documents = loader.load()

In [5]:
documents

[Document(page_content=' \nPreços para Carga Fracionada - Natureza da Carga: PROD CREMER S/A (008925)\nGrupo  de Origem: 544160 - SC-ITA (INT 1 250)CREMER S/A 82641325000118\nCREMER S.A. 82641325004377\nGrupos Destino Fr. R$ por\nt/m³ 1ADV % s/ NF2Ped. Fração\n100Kg 3Prz. Entrega4 SEC-CAT\nAC-RBR (CAP 70) 2.101,14 0,0800 8,26 15 56,75\nAC-RBR (INT 1 250) 3.075,22 0,0800 8,26 20 56,75\nAC-RBR (INT 2 700) 4.818,28 0,0800 8,26 20 56,75\nAM-MAO (CAP 70) 1.868,07 0,2000 8,26 15 56,75\nAM-MAO (INT 1 250) 4.534,02 0,2000 8,26 17 56,75\nAM-MAO (INT FL 1) 5.440,87 0,2000 8,26 28 56,75\nAM-MAO (INT FL 2) 5.894,34 0,2000 8,26 31 56,75\nAP-MCP (CAP 70) 1.896,28 0,2000 8,26 12 56,75\nAP-MCP (INT 1 250) 3.280,28 0,2000 8,26 21 56,75\nAP-MCP (INT 2 700) 4.459,59 0,2000 8,26 21 56,75\nDF-BSB (VIA GYN) 840,67 0,0800 8,26 06 21,55\nGO-GYN (CAP 70) 754,34 0,0800 8,26 06 21,55\nGO-GYN (INT 1 250) 1.051,21 0,0800 8,26 09 35,90\nGO-GYN (INT 2 700) 1.179,98 0,0800 8,26 11 35,90\nMS-CGR (CAP 70) 893,70 0,0800

In [6]:
len(documents)

6

In [7]:
text = RecursiveCharacterTextSplitter().split_documents(documents)

In [8]:
text 

[Document(page_content='Preços para Carga Fracionada - Natureza da Carga: PROD CREMER S/A (008925)\nGrupo  de Origem: 544160 - SC-ITA (INT 1 250)CREMER S/A 82641325000118\nCREMER S.A. 82641325004377\nGrupos Destino Fr. R$ por\nt/m³ 1ADV % s/ NF2Ped. Fração\n100Kg 3Prz. Entrega4 SEC-CAT\nAC-RBR (CAP 70) 2.101,14 0,0800 8,26 15 56,75\nAC-RBR (INT 1 250) 3.075,22 0,0800 8,26 20 56,75\nAC-RBR (INT 2 700) 4.818,28 0,0800 8,26 20 56,75\nAM-MAO (CAP 70) 1.868,07 0,2000 8,26 15 56,75\nAM-MAO (INT 1 250) 4.534,02 0,2000 8,26 17 56,75\nAM-MAO (INT FL 1) 5.440,87 0,2000 8,26 28 56,75\nAM-MAO (INT FL 2) 5.894,34 0,2000 8,26 31 56,75\nAP-MCP (CAP 70) 1.896,28 0,2000 8,26 12 56,75\nAP-MCP (INT 1 250) 3.280,28 0,2000 8,26 21 56,75\nAP-MCP (INT 2 700) 4.459,59 0,2000 8,26 21 56,75\nDF-BSB (VIA GYN) 840,67 0,0800 8,26 06 21,55\nGO-GYN (CAP 70) 754,34 0,0800 8,26 06 21,55\nGO-GYN (INT 1 250) 1.051,21 0,0800 8,26 09 35,90\nGO-GYN (INT 2 700) 1.179,98 0,0800 8,26 11 35,90\nMS-CGR (CAP 70) 893,70 0,0800 8,

In [9]:
# Load embedding model
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5", 
encode_kwargs={"normalize_embeddings": True})

In [10]:
# Create a vectorstore
vectorstore = FAISS.from_documents(text, embeddings)

# Save the documents and embeddings
vectorstore.save_local("vectorstore.db")

In [11]:
retriever = vectorstore.as_retriever()

In [12]:
# Load the llm 
llm = ChatOpenAI(model_name="gpt-3.5-turbo")

# Define prompt template
template = """
You are an assistant for question-answering tasks.
Use the provided context only to answer the following question:

<context>
{context}
</context>

Question: {input}
"""

# Create a prompt template
prompt = ChatPromptTemplate.from_template(template)



  @deprecated('1.4.0')


In [13]:
prompt

ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template='\nYou are an assistant for question-answering tasks.\nUse the provided context only to answer the following question:\n\n<context>\n{context}\n</context>\n\nQuestion: {input}\n'))])

In [14]:
doc_chain = create_stuff_documents_chain(llm, prompt)

In [15]:
doc_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), config={'run_name': 'format_inputs'})
| ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template='\nYou are an assistant for question-answering tasks.\nUse the provided context only to answer the following question:\n\n<context>\n{context}\n</context>\n\nQuestion: {input}\n'))])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x00000274BBA88D60>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x00000274BBA8A770>, openai_api_key='sk-proj-mqAcM3b3RXMkXKaeTccST3BlbkFJk1amcJf4EcY2YJmgDKTO', openai_proxy='')
| StrOutputParser(), config={'run_name': 'stuff_documents_chain'})

In [16]:

chain = create_retrieval_chain(retriever, doc_chain)

In [17]:
chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000027480403220>), config={'run_name': 'retrieve_documents'})
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), config={'run_name': 'format_inputs'})
            | ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template='\nYou are an assistant for question-answering tasks.\nUse the provided context only to answer the following question:\n\n<context>\n{context}\n</context>\n\nQuestion: {input}\n'))])
            | ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x00000274BBA

In [31]:
# User query 
response = chain.invoke({"input": "quanto custa mandar 1000kg do rio grande do norte para blumenau?"})

# Get the Answer only
response['answer']

'Para mandar 1000kg do Rio Grande do Norte para Blumenau, considerando o Grupo Destino FR. R$ por t/m³ de TO-GRI (CAP 70) que tem o valor de R$ 1.060,30 por tonelada/m³ e a fração de 100kg que tem o valor de R$ 56,75, o custo total seria de:\n\nCusto por tonelada/m³: R$ 1.060,30\nCusto por 100kg: R$ 56,75\n\nComo temos 1000kg, podemos calcular o custo total da seguinte forma:\n\nCusto por tonelada/m³: R$ 1.060,30\nCusto por 100kg: R$ 56,75\n1000kg = 10 * 100kg\n\nCusto total = (10 * R$ 56,75) + R$ 1.060,30\nCusto total = R$ 567,50 + R$ 1.060,30\nCusto total = R$ 1.627,80\n\nPortanto, o custo para enviar 1000kg do Rio Grande do Norte para Blumenau seria de R$ 1.627,80.'