In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma

# Load Environment Variables

In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

True

# Document Index - Vector Store
***
* Load Documents
* Split the text
* Embed the texts
* Store them in a vector store

In [3]:
# Document loading parameters
CHUNK_SIZE = 500
CHUNK_OVERLAP = 100

DOCS_DIRECTORY = 'docs/'

## Load Documents

In [4]:
docs = []
for fp in os.listdir(DOCS_DIRECTORY):
    loader = PyPDFLoader(file_path=DOCS_DIRECTORY+fp) # Each page is loaded as a different doc

    docs_lazy = loader.lazy_load()

    for doc in docs_lazy:
        docs.append(doc)

## Split Documents

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=CHUNK_SIZE,
    chunk_overlap=CHUNK_OVERLAP,
    length_function=len,
    is_separator_regex=False,
)

chunk_docs = text_splitter.split_documents(docs)

In [6]:
# embedding_model = OpenAIEmbeddings()
# embed_docs = embedding_model.embed_documents(chunk_docs)

# print(f"Embedded {len(embed_docs)} document chunks")

## Create Vector Store
* Add documents to the store
* Embed them with the provided embedding model
* Persist the vector store
* The vector store will also retrieve the most similar documents given a query. It will embed it with the same embedding model provided

In [9]:
# Create Vector Store - Embed the document chunks and add them to the vector store
embedding_model = OpenAIEmbeddings()

vector_store = Chroma.from_documents(collection_name="doc-collection",
                                     documents=chunk_docs,
                                     embedding=embedding_model,
                                     persist_directory='./chromadb')

In [12]:
similar_docs = vector_store.similarity_search(query='What is the attention mechanism?',
                                            k=2)

for document in similar_docs:
    print(document)
    print("\n\n\n")

page_content='Attention mechanisms have become an integral part of compelling sequence modeling and transduc-
tion models in various tasks, allowing modeling of dependencies without regard to their distance in
the input or output sequences [2, 16]. In all but a few cases [22], however, such attention mechanisms
are used in conjunction with a recurrent network.
In this work we propose the Transformer, a model architecture eschewing recurrence and instead' metadata={'author': 'Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Łukasz Kaiser, Illia Polosukhin', 'book': 'Advances in Neural Information Processing Systems 30', 'created': '2017', 'creationdate': '', 'creator': 'PyPDF', 'date': '2017', 'description': 'Paper accepted and presented at the Neural Information Processing Systems Conference (http://nips.cc/)', 'description-abstract': 'The dominant sequence transduction models are based on complex recurrent orconvolutional neural networks in an e

# Single Q&A RAG
***
* Create a Chain, which reads the user input -> Retrieves the relevant docs from the vector store -> Adds them to the LLM context and generates a response

In [None]:
from langchain_openai import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate

In [14]:
CHAT_MODEL = "gpt-4o-mini"

In [None]:
def docs_2_str(docs):
    return "\n\n".join([doc.page_content for doc in docs])

def message_to_text(message):
    return message.content

## Retriever

In [None]:
# Create retriever from the vector store, so that it can be included in the chain as it will get access to the invoke method
retriever = vector_store.as_retriever()

## Prompt template using retrieved context and user query

In [56]:
# Formulate the input to the LLM. Equip it with the context from the retriever, along with the user query
system_message = """
You are a friendly assistant in answering the user's queries. For answering the queries, you have access to the following context:
{context}

INSTRUCTIONS:
1. Use only the provided context to answer the query.
2. If you do not find the necessary information to answer the question in the provided context, then respond with the following: <I do not have the necessary context to answer this query>.
"""

human_message = "Answer this: {query}"

template = ChatPromptTemplate([('system', system_message),
                               ('human', human_message)])
template

ChatPromptTemplate(input_variables=['context', 'query'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="\nYou are a friendly assistant in answering the user's queries. For answering the queries, you have access to the following context:\n{context}\n\nINSTRUCTIONS:\n1. Use only the provided context to answer the query.\n2. If you do not find the necessary information to answer the question in the provided context, then respond with the following: <I do not have the necessary context to answer this query>.\n"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['query'], input_types={}, partial_variables={}, template='Answer this: {query}'), additional_kwargs={})])

## LLM to consume the context and respond to the user query

In [None]:
llm = ChatOpenAI(
            model=CHAT_MODEL,
            temperature=0,
            max_tokens=None,
            timeout=None,
            max_retries=2,
        )

## Final RAG chain

In [None]:
simple_rag_chain = ({"context" : retriever | docs_2_str, "query": RunnablePassthrough()}
                    | template
                    | llm
                    | message_to_text)

In [54]:
simple_rag_chain.invoke("Who is rheumatoid arthiritis more prevalent in?")

'Rheumatoid arthritis is known to disproportionately affect women, similar to conditions such as psoriatic arthritis (PsA), lupus, and fibromyalgia.'

In [55]:
simple_rag_chain.invoke("What would be a good destination to travel to this summer?")

'<I do not have the necessary context to answer this query>'

In [57]:
simple_rag_chain.invoke("Who is rheumatoid arthiritis more prevalent in?")

'Rheumatoid arthritis is known to disproportionately affect women, similar to conditions such as psoriatic arthritis (PsA), lupus, and fibromyalgia.'

In [58]:
simple_rag_chain.invoke("Okay, well how about men?")

'<I do not have the necessary context to answer this query>'

# Conversational RAG
* Ask follow up questions to the application
* Enable conversation style interactions
* Introduce memory

In [None]:
simple_rag_chain.invoke("Who is rheumatoid arthiritis more prevalent in?")

'Rheumatoid arthritis is known to disproportionately affect women, similar to conditions such as psoriatic arthritis (PsA), lupus, and fibromyalgia.'

In [None]:
simple_rag_chain.invoke("How about men?")

'<I do not have the necessary context to answer this query>'