## Chat sobre documentos PDF : RAG con LangChain, Ollama, y FAISS Vector Store

In [None]:
# pip install -U langchain-community faiss-cpu langchain-huggingface pymupdf tiktoken langchain-ollama python-dotenv

In [None]:
import os
import warnings
from dotenv import load_dotenv

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
warnings.filterwarnings("ignore")

load_dotenv()

### Document Loader

In [2]:
from langchain_community.document_loaders import PyMuPDFLoader

# loader = PyMuPDFLoader("./dgi-dataset/*.pdf")

# docs = loader.load()

# load multiple pdfs
pdf_directory = "./dgi-dataset"
documents = []

for filename in os.listdir(pdf_directory):
    if filename.endswith(".pdf"):
        loader = PyMuPDFLoader(os.path.join(pdf_directory, filename))
        documents.extend(loader.load())

In [None]:
documents[0]

In [None]:
doc = docs[0]
# print(doc.page_content)

In [5]:
import os

pdfs = []
for root, dirs, files in os.walk('dgi-dataset'):
    # print(root, dirs, files)
    for file in files:
        if file.endswith('.pdf'):
            pdfs.append(os.path.join(root, file))

In [6]:
docs = []
for pdf in pdfs:
    loader = PyMuPDFLoader(pdf)
    pages = loader.load()

    docs.extend(pages)

In [None]:
len(docs)

### Document Chunking

In [8]:
from langchain_text_splitters import RecursiveCharacterTextSplitter


text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

chunks = text_splitter.split_documents(docs)

In [None]:
len(docs), len(chunks)

In [None]:
len(docs[0].page_content), len(chunks[0].page_content)

In [None]:
import tiktoken

encoding = tiktoken.encoding_for_model("gpt-4o-mini")

len(encoding.encode(docs[0].page_content)), len(encoding.encode(chunks[0].page_content))

### Document Vector Embedding

In [12]:
from langchain_ollama import OllamaEmbeddings

import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore

In [14]:
embeddings = OllamaEmbeddings(model='nomic-embed-text', base_url="http://localhost:11434")

single_vector = embeddings.embed_query("this is some text data")


In [15]:
len(single_vector)

768

In [None]:
index = faiss.IndexFlatL2(len(single_vector))
index.ntotal, index.d

In [17]:
vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

In [None]:
len(chunks)

In [19]:
# help(vector_store)

In [20]:
ids = vector_store.add_documents(documents=chunks)

In [None]:
vector_store.index_to_docstore_id
len(ids)

In [None]:
# # store vector database
db_name = "normativas-dgi"
vector_store.save_local(db_name)

# # # load vector database
new_vector_store = FAISS.load_local(db_name, embeddings=embeddings, allow_dangerous_deserialization=True)
len(new_vector_store.index_to_docstore_id)

### Retreival

In [None]:
question = "¿Cuándo debo hacer una nota de crédito?"
docs = vector_store.search(query=question, search_type='similarity')

for doc in docs:
    print(doc.page_content)
    print("\n\n")

In [24]:
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs = {'k': 3, 
                                                                          'fetch_k': 100,
                                                                          'lambda_mult': 1})

In [None]:
docs = retriever.invoke(question)

for doc in docs:
     print(doc.page_content)
     print("\n\n")


In [26]:


question = "¿Cuándo debo hacer una nota de crédito?"
docs = retriever.invoke(question)


### RAG con LLAMA 3.2 sobre OLLAMA

In [38]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate

from langchain_ollama import ChatOllama

from langchain_openai import ChatOpenAI

In [76]:
model = ChatOllama(model="llama3.2:1b", base_url="http://localhost:11434")

#model = ChatOpenAI(model="gpt-4o")

#model.invoke("hi")

In [77]:
prompt = hub.pull("rlm/rag-prompt")

In [85]:
prompt = """
    You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question.
    If you don't know the answer, just say that you don't know. 
    Answer in bullet points. Make sure your answer is relevant to the question and it is answered from the context only.
    Question: {question} 
    Context: {context} 
    Answer:
"""

prompt = ChatPromptTemplate.from_template(prompt)

In [86]:
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

# print(format_docs(docs))

In [87]:
rag_chain = (
    {"context": retriever|format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [None]:
output = rag_chain.invoke(question)
print(output)


In [89]:
import gradio as gr # oh yeah!

In [90]:
# Let's create a call that streams back results

def chatbot(question):
    															  
    docs = retriever.invoke(question)

    rag_chain = (
    {"context": retriever|format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser())

    output = rag_chain.invoke(question)

    return output

    # result = ""
    # for chunk in stream:
    #     result += chunk.choices[0].delta.content or ""
    #     yield result


In [None]:
view = gr.Interface(
    fn=chatbot,
    inputs=[gr.Textbox(label="Your message:", lines=6)],
    outputs=[gr.Textbox(label="Response:", lines=20)],
    flagging_mode="never"
)

gr.themes.Ocean() 

view.launch(share=True)