In [2]:
import os

from langchain_community.document_loaders import ArxivLoader

from langchain_community.document_loaders import TextLoader
from langchain_pinecone import PineconeVectorStore
from langchain_huggingface import HuggingFaceEmbeddings

from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

from langchain_groq.chat_models import ChatGroq
from langchain_ollama import ChatOllama

import uuid

from dotenv import load_dotenv, find_dotenv

# Load the API keys from .env
load_dotenv(find_dotenv(), override=True)


from src.vectordb.create_vectordb import PinconeVectorDb

  from tqdm.autonotebook import tqdm


In [3]:
pc = PinconeVectorDb()
pc.create_pinecone_index(index_name='test-arxiv')

Index test-arxiv already exists.
Index Stats:
{'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}


In [4]:
embeddings = HuggingFaceEmbeddings(model_name=os.getenv('EMBEDDING_MODEL_NAME'))
vectorstore = PineconeVectorStore(index=pc.index, embedding=embeddings)
vectorstore

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x27d108572b0>

In [5]:
loader = ArxivLoader(query=os.getenv('QUERY'), load_max_docs=30)
documents = loader.load()
documents

[Document(metadata={'Published': '2024-08-05', 'Title': 'RAG Foundry: A Framework for Enhancing LLMs for Retrieval Augmented Generation', 'Authors': 'Daniel Fleischer, Moshe Berchansky, Moshe Wasserblat, Peter Izsak', 'Summary': 'Implementing Retrieval-Augmented Generation (RAG) systems is inherently\ncomplex, requiring deep understanding of data, use cases, and intricate design\ndecisions. Additionally, evaluating these systems presents significant\nchallenges, necessitating assessment of both retrieval accuracy and generative\nquality through a multi-faceted approach. We introduce RAG Foundry, an\nopen-source framework for augmenting large language models for RAG use cases.\nRAG Foundry integrates data creation, training, inference and evaluation into a\nsingle workflow, facilitating the creation of data-augmented datasets for\ntraining and evaluating large language models in RAG settings. This integration\nenables rapid prototyping and experimentation with various RAG techniques,\na

In [6]:
len(documents)

3

In [7]:
doc_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=64)
docs = doc_splitter.split_documents(documents)
docs

[Document(metadata={'Published': '2024-08-05', 'Title': 'RAG Foundry: A Framework for Enhancing LLMs for Retrieval Augmented Generation', 'Authors': 'Daniel Fleischer, Moshe Berchansky, Moshe Wasserblat, Peter Izsak', 'Summary': 'Implementing Retrieval-Augmented Generation (RAG) systems is inherently\ncomplex, requiring deep understanding of data, use cases, and intricate design\ndecisions. Additionally, evaluating these systems presents significant\nchallenges, necessitating assessment of both retrieval accuracy and generative\nquality through a multi-faceted approach. We introduce RAG Foundry, an\nopen-source framework for augmenting large language models for RAG use cases.\nRAG Foundry integrates data creation, training, inference and evaluation into a\nsingle workflow, facilitating the creation of data-augmented datasets for\ntraining and evaluating large language models in RAG settings. This integration\nenables rapid prototyping and experimentation with various RAG techniques,\na

In [8]:
len(docs)

334

In [10]:
uuids = [str(uuid.uuid4()) for _ in range(len(documents))]

vectorstore.add_documents(docs, ids=uuids)

['979edb0b-d10d-4575-a628-481ab1b954f2',
 'dddff9d0-52b5-46db-82b7-23d591f9a65c',
 'e3a16818-b7d0-4496-a5fa-8471a2e7c1fd']

In [11]:
vectorstore

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x27d108572b0>

In [12]:
retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3})
retriever

VectorStoreRetriever(tags=['PineconeVectorStore', 'HuggingFaceEmbeddings'], vectorstore=<langchain_pinecone.vectorstores.PineconeVectorStore object at 0x0000027D108572B0>, search_type='mmr', search_kwargs={'k': 3})

In [13]:
retriever.invoke("latest trends in rag")

[Document(metadata={'Authors': 'Daniel Fleischer, Moshe Berchansky, Moshe Wasserblat, Peter Izsak', 'Published': '2024-08-05', 'Summary': 'Implementing Retrieval-Augmented Generation (RAG) systems is inherently\ncomplex, requiring deep understanding of data, use cases, and intricate design\ndecisions. Additionally, evaluating these systems presents significant\nchallenges, necessitating assessment of both retrieval accuracy and generative\nquality through a multi-faceted approach. We introduce RAG Foundry, an\nopen-source framework for augmenting large language models for RAG use cases.\nRAG Foundry integrates data creation, training, inference and evaluation into a\nsingle workflow, facilitating the creation of data-augmented datasets for\ntraining and evaluating large language models in RAG settings. This integration\nenables rapid prototyping and experimentation with various RAG techniques,\nallowing users to easily generate datasets and train RAG models using internal\nor specializ

In [14]:
# llm = ChatGroq(model="llama-3.1-70b-versatile",
#                       stop_sequences="[end]",
#                       temperature=0.)



llm = ChatOllama(
    model="llama3.2:latest",
    temperature=0.,
    
)

In [15]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [16]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

results = rag_chain.invoke({"input": "What are the latest trends in rag?"})

results

{'input': 'What are the latest trends in rag?',
 'context': [Document(metadata={'Authors': 'Daniel Fleischer, Moshe Berchansky, Moshe Wasserblat, Peter Izsak', 'Published': '2024-08-05', 'Summary': 'Implementing Retrieval-Augmented Generation (RAG) systems is inherently\ncomplex, requiring deep understanding of data, use cases, and intricate design\ndecisions. Additionally, evaluating these systems presents significant\nchallenges, necessitating assessment of both retrieval accuracy and generative\nquality through a multi-faceted approach. We introduce RAG Foundry, an\nopen-source framework for augmenting large language models for RAG use cases.\nRAG Foundry integrates data creation, training, inference and evaluation into a\nsingle workflow, facilitating the creation of data-augmented datasets for\ntraining and evaluating large language models in RAG settings. This integration\nenables rapid prototyping and experimentation with various RAG techniques,\nallowing users to easily generat

In [17]:
question_answer_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, say that you don't know. Use three sentences maximum and keep the answer concise.\n\n{context}"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={})])
| ChatOllama(model='llama3.2:latest', temperature=0.0)
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'}, config_factories=[])

In [18]:
rag_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['PineconeVectorStore', 'HuggingFaceEmbeddings'], vectorstore=<langchain_pinecone.vectorstores.PineconeVectorStore object at 0x0000027D108572B0>, search_type='mmr', search_kwargs={'k': 3}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved