# RAG

In [1]:
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores.faiss import FAISS
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [2]:
caminhos = [
    "samples/LangChain.pdf",
    ]

paginas = []
for caminho in caminhos:
    loader = PyPDFLoader(caminho)
    paginas.extend(loader.load())

recur_split = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100,
    separators=["\n\n", "\n", ".", " ", ""]
)

documents = recur_split.split_documents(paginas)

for i, doc in enumerate(documents):
    doc.metadata['source'] = doc.metadata['source'].replace('samples/', '')
    doc.metadata['doc_id'] = i

In [None]:
vectorstore = FAISS.from_documents(
    documents=documents,
    embedding=OllamaEmbeddings(model="llama2")
)

In [None]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    '''Responda as perguntas se baseando no contexto fornecido.
    
    contexto: {contexto}
    
    pergunta: {pergunta}''')

In [None]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough


retriever = vectorstore.as_retriever(search_type='mmr', search_kwargs={'k': 5, 'fetch_k': 25})
setup = RunnableParallel({
    'pergunta': RunnablePassthrough(),
    'contexto': retriever
})

In [None]:
input_ = setup.invoke('O que é a OpenAI?')
input_

In [None]:
def join_documents(input):
    input['contexto'] = '\n\n'.join([c.page_content for c in input['contexto']])
    return input

setup = RunnableParallel({
    'pergunta': RunnablePassthrough(),
    'contexto': retriever
}) | join_documents

In [None]:
input_ = setup.invoke('O que é a LangChain?')
input_['contexto']

In [None]:
from langchain_ollama.chat_models import ChatOllama

chain = setup | prompt | ChatOllama(model="llama2")
chain.invoke('O que é a LangChain?')

In [None]:
prompt = ChatPromptTemplate.from_template(
    '''Responda as perguntas se baseando no contexto fornecido.
    
    contexto: {contexto}
    
    pergunta: {pergunta}''')

retriever = vectorstore.as_retriever(search_type='mmr', search_kwargs={'k': 5, 'fetch_k': 25})


setup = RunnableParallel({
    'pergunta': RunnablePassthrough(),
    'contexto': retriever
}) | join_documents

chain = setup | prompt | ChatOllama()
chain.invoke('O que é a LangChain?')