In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma

# Load Environment Variables

In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

True

# Document Index - Vector Store
***
* Load Documents
* Split the text
* Embed the texts
* Store them in a vector store

In [3]:
# Document loading parameters
CHUNK_SIZE = 500
CHUNK_OVERLAP = 100

DOCS_DIRECTORY = 'docs/'

## Load Documents

In [4]:
docs = []
for fp in os.listdir(DOCS_DIRECTORY):
    loader = PyPDFLoader(file_path=DOCS_DIRECTORY+fp) # Each page is loaded as a different doc

    docs_lazy = loader.lazy_load()

    for doc in docs_lazy:
        docs.append(doc)

## Split Documents

In [5]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=CHUNK_SIZE,
    chunk_overlap=CHUNK_OVERLAP,
    length_function=len,
    is_separator_regex=False,
)

chunk_docs = text_splitter.split_documents(docs)

In [6]:
# embedding_model = OpenAIEmbeddings()
# embed_docs = embedding_model.embed_documents(chunk_docs)

# print(f"Embedded {len(embed_docs)} document chunks")

## Create Vector Store
* Add documents to the store
* Embed them with the provided embedding model
* Persist the vector store
* The vector store will also retrieve the most similar documents given a query. It will embed it with the same embedding model provided

In [9]:
# Create Vector Store - Embed the document chunks and add them to the vector store
embedding_model = OpenAIEmbeddings()

vector_store = Chroma.from_documents(collection_name="doc-collection",
                                     documents=chunk_docs,
                                     embedding=embedding_model,
                                     persist_directory='./chromadb')

In [12]:
similar_docs = vector_store.similarity_search(query='What is the attention mechanism?',
                                            k=2)

for document in similar_docs:
    print(document)
    print("\n\n\n")

page_content='Attention mechanisms have become an integral part of compelling sequence modeling and transduc-
tion models in various tasks, allowing modeling of dependencies without regard to their distance in
the input or output sequences [2, 16]. In all but a few cases [22], however, such attention mechanisms
are used in conjunction with a recurrent network.
In this work we propose the Transformer, a model architecture eschewing recurrence and instead' metadata={'author': 'Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Łukasz Kaiser, Illia Polosukhin', 'book': 'Advances in Neural Information Processing Systems 30', 'created': '2017', 'creationdate': '', 'creator': 'PyPDF', 'date': '2017', 'description': 'Paper accepted and presented at the Neural Information Processing Systems Conference (http://nips.cc/)', 'description-abstract': 'The dominant sequence transduction models are based on complex recurrent orconvolutional neural networks in an e

# Single Q&A RAG
***
* Create a Chain, which reads the user input -> Retrieves the relevant docs from the vector store -> Adds them to the LLM context and generates a response

In [None]:
from langchain_openai import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate

In [14]:
CHAT_MODEL = "gpt-4o-mini"

In [None]:
def docs_2_str(docs):
    return "\n\n".join([doc.page_content for doc in docs])

def message_to_text(message):
    return message.content

## Retriever

In [None]:
# Create retriever from the vector store, so that it can be included in the chain as it will get access to the invoke method
retriever = vector_store.as_retriever()

## Prompt template using retrieved context and user query

In [56]:
# Formulate the input to the LLM. Equip it with the context from the retriever, along with the user query
system_message = """
You are a friendly assistant in answering the user's queries. For answering the queries, you have access to the following context:
{context}

INSTRUCTIONS:
1. Use only the provided context to answer the query.
2. If you do not find the necessary information to answer the question in the provided context, then respond with the following: <I do not have the necessary context to answer this query>.
"""

human_message = "Answer this: {query}"

template = ChatPromptTemplate([('system', system_message),
                               ('human', human_message)])
template

ChatPromptTemplate(input_variables=['context', 'query'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="\nYou are a friendly assistant in answering the user's queries. For answering the queries, you have access to the following context:\n{context}\n\nINSTRUCTIONS:\n1. Use only the provided context to answer the query.\n2. If you do not find the necessary information to answer the question in the provided context, then respond with the following: <I do not have the necessary context to answer this query>.\n"), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['query'], input_types={}, partial_variables={}, template='Answer this: {query}'), additional_kwargs={})])

## LLM to consume the context and respond to the user query

In [None]:
llm = ChatOpenAI(
            model=CHAT_MODEL,
            temperature=0,
            max_tokens=None,
            timeout=None,
            max_retries=2,
        )

## Final RAG chain

In [None]:
# Runnable pass through would just pass what we give in the input invoke
simple_rag_chain = ({"context" : retriever | docs_2_str, "query": RunnablePassthrough()}
                    | template
                    | llm
                    | message_to_text)

In [54]:
simple_rag_chain.invoke("Who is rheumatoid arthiritis more prevalent in?")

'Rheumatoid arthritis is known to disproportionately affect women, similar to conditions such as psoriatic arthritis (PsA), lupus, and fibromyalgia.'

In [55]:
simple_rag_chain.invoke("What would be a good destination to travel to this summer?")

'<I do not have the necessary context to answer this query>'

In [57]:
simple_rag_chain.invoke("Who is rheumatoid arthiritis more prevalent in?")

'Rheumatoid arthritis is known to disproportionately affect women, similar to conditions such as psoriatic arthritis (PsA), lupus, and fibromyalgia.'

In [58]:
simple_rag_chain.invoke("Okay, well how about men?")

'<I do not have the necessary context to answer this query>'

# Conversational RAG
* Ask follow up questions to the application
* Enable conversation style interactions
* Introduce memory

In [80]:
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage

In [None]:
simple_rag_chain.invoke("Who is rheumatoid arthiritis more prevalent in?")

'Rheumatoid arthritis is known to disproportionately affect women, similar to conditions such as psoriatic arthritis (PsA), lupus, and fibromyalgia.'

In [None]:
simple_rag_chain.invoke("How about men?")

'<I do not have the necessary context to answer this query>'

In [105]:
contextualized_query_prompt = """
Given a chat history and the latest user question
which might reference context in the chat history,
formulate a standalone question which can be understood
without the chat history. Do NOT answer the question,
just reformulate it if needed and otherwise return it as is
"""

contextualized_query_template = ChatPromptTemplate.from_messages([('system', contextualized_query_prompt),
                                                         MessagesPlaceholder('chat_history'),
                                                         ('human', "{input}")])

In [None]:
contextualized_query_chain = contextualized_query_template| llm | message_to_text

In [None]:
contextualized_query_chain.invoke({"input": "So, by when we expect it to be overtaken?", "chat_history": []})

'When do we expect it to be surpassed?'

In [None]:
llm_chain = llm | message_to_text

chat_history = []

query1 = "What is the most populous country in the world?"
answer1 = llm_chain.invoke(query1)
print(answer1)


chat_history.extend([HumanMessage(query1),
                     AIMessage(answer1)])

query2 = "So, by when we expect it to be overtaken?"
rephrased_query2 = contextualized_query_chain.invoke({"input": query2, "chat_history": chat_history})
print(rephrased_query2)

As of my last update in October 2023, China is the most populous country in the world, followed closely by India. However, demographic trends indicate that India may surpass China in population soon, if it hasn't already. For the most current population figures, it's best to consult the latest data from reliable sources such as the United Nations or the World Bank.
When is India expected to overtake China in population?


## History Aware Retriever
* We will use a prebuilt chain provided by langchain, but the concept is the same as we saw earlier

In [137]:
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

In [113]:
# This retriever adds information to the query from the chat history and then uses that for retrieval
history_aware_retriever = create_history_aware_retriever(llm=llm, 
                                                         retriever=retriever,
                                                         prompt=contextualized_query_template)

In [128]:
history_aware_retriever

RunnableBinding(bound=RunnableBranch(branches=[(RunnableLambda(lambda x: not x.get('chat_history', False)), RunnableLambda(lambda x: x['input'])
| VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000001A8DBFF5910>, search_kwargs={}))], default=ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChun

In [131]:
history_aware_retriever.invoke({"input":"Transformers", "chat_history":[]})

[Document(id='483990e5-e913-4397-8c65-ede4b3b31787', metadata={'author': 'Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N. Gomez, Łukasz Kaiser, Illia Polosukhin', 'book': 'Advances in Neural Information Processing Systems 30', 'created': '2017', 'creationdate': '', 'creator': 'PyPDF', 'date': '2017', 'description': 'Paper accepted and presented at the Neural Information Processing Systems Conference (http://nips.cc/)', 'description-abstract': 'The dominant sequence transduction models are based on complex recurrent orconvolutional neural networks in an encoder and decoder configuration. The best performing such models also connect the encoder and decoder through an attentionm echanisms.  We propose a novel, simple network architecture based solely onan attention mechanism, dispensing with recurrence and convolutions entirely.Experiments on two machine translation tasks show these models to be superiorin quality while being more parallelizable and requi

## Stuff Documents Chain
* Takes a list of documents as input and feeds them to an LLM

In [140]:
# conversational_rag = ({"context" : history_aware_retriever | docs_2_str, "query": RunnablePassthrough.assign(query=lambda x: x['input'])}
#                     | template
#                     | llm
#                     | message_to_text
#                     )

qa_system_prompt = """
You have access to the following context:
{context}

Instructions:
1. Answer the user's query using the provided context
2. Do not make up any responses, only provide an answer if you can find related content in the provided context
"""

qa_prompt = ChatPromptTemplate.from_messages([('system', qa_system_prompt),
                                              ('human', "{input}")])

# This is a chain which consumes a list of documents and passes it to an llm
answer_chain = create_stuff_documents_chain(llm, qa_prompt)

## RAG Chain

In [141]:
rag_chain = create_retrieval_chain(retriever=history_aware_retriever,
                                combine_docs_chain=answer_chain)

In [142]:
chat_history = []

query1 = "Who is prone to rheumatoid arthritis?"
answer1 = rag_chain.invoke({"input": query1, "chat_history":chat_history})
print(answer1)

{'input': 'Who is prone to rheumatoid arthritis?', 'chat_history': [], 'context': [Document(id='086737cd-9154-4727-b037-d0c7755d544b', metadata={'creationdate': '2024-09-03T10:49:12+02:00', 'creator': 'Adobe InDesign 19.5 (Macintosh)', 'moddate': '2024-09-03T10:49:12+02:00', 'page': 0, 'page_label': '196', 'producer': 'Adobe PDF Library 17.0', 'source': 'docs/rheumatoid_arthritis_in_women.pdf', 'total_pages': 9, 'trapped': '/False'}, page_content='reactive arthritis, and arthritis associated \nwith inflammatory bowel disease. Of these, \nPsA, with its dermatological manifesta-\ntions, particularly psoriasis, represents a \nsignificant challenge for women. Psoriasis, \na skin disease characterized by skin mani -\nfestations ranging from mild, localized \nplaques to severe, generalized forms, has a \ndifferent clinical course and response to \ntreatment in women and in men (2). This \ndifference may be due to several factors,'), Document(id='d1a0bd84-c52e-4a11-9fd1-d7ff30bb7c0f', metadat

In [145]:
answer1

{'input': 'Who is prone to rheumatoid arthritis?',
 'chat_history': [],
 'context': [Document(id='086737cd-9154-4727-b037-d0c7755d544b', metadata={'creationdate': '2024-09-03T10:49:12+02:00', 'creator': 'Adobe InDesign 19.5 (Macintosh)', 'moddate': '2024-09-03T10:49:12+02:00', 'page': 0, 'page_label': '196', 'producer': 'Adobe PDF Library 17.0', 'source': 'docs/rheumatoid_arthritis_in_women.pdf', 'total_pages': 9, 'trapped': '/False'}, page_content='reactive arthritis, and arthritis associated \nwith inflammatory bowel disease. Of these, \nPsA, with its dermatological manifesta-\ntions, particularly psoriasis, represents a \nsignificant challenge for women. Psoriasis, \na skin disease characterized by skin mani -\nfestations ranging from mild, localized \nplaques to severe, generalized forms, has a \ndifferent clinical course and response to \ntreatment in women and in men (2). This \ndifference may be due to several factors,'),
  Document(id='d1a0bd84-c52e-4a11-9fd1-d7ff30bb7c0f', met

In [146]:
chat_history.extend([HumanMessage(query1),
                     AIMessage(answer1['answer'])])

query2 = "How about men?"
answer2 = rag_chain.invoke({"input": query2, "chat_history":chat_history})
print(answer2['answer'])

The provided context primarily discusses the prevalence and challenges of psoriatic arthritis (PsA) and psoriasis in women, noting that these conditions disproportionately affect women compared to men. However, it does not provide specific information about men or their experiences with these conditions. Therefore, I cannot provide a detailed answer regarding men based on the given context.


# Multi User Conversational RAG (Multi-User Chatbot)
***
* Store each user's chat history in a db
* Retrieve the existing history if a new chat with an existing user is started
* For each new user, create a unique identifier (uuid)

## Setup the SQLite Database

In [148]:
import sqlite3
from datetime import datetime
import uuid

In [149]:
DB_NAME = "rag_app.db"

In [160]:
# Connect to database
def connect_to_db(db_name):
    conn = sqlite3.connect(db_name)
    conn.row_factory = sqlite3.Row # Return rows as dictionaries
    return conn

# Create table to store logs
def create_application_logs(db_name):
    conn = connect_to_db(db_name)
    cursor = conn.cursor()

    # Use conn to interact with the db
    cursor.execute("""CREATE TABLE IF NOT EXISTS application_logs
                 (id INTEGER PRIMARY KEY AUTOINCREMENT,
                 session_id TEXT,
                 user_query TEXT,
                 model_response TEXT,
                 model TEXT,
                 created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)""")
    conn.commit()
    conn.close()
    return None

# Insert
def insert_application_logs(db_name, session_id, user_query, model_response, model):
    conn = connect_to_db(db_name)
    cursor = conn.cursor()

    cursor.execute("""INSERT INTO application_logs (session_id, user_query, model_response, model) VALUES
                 (?, ?, ?, ?)""", (session_id, user_query, model_response, model))
    conn.commit()
    conn.close()
    return None


# Retrieve
def get_chat_history(db_name, session_id):
    conn = connect_to_db(db_name)
    cursor = conn.cursor()

    result = cursor.execute("""SELECT user_query, model_response
                                from application_logs
                                where session_id= ?
                                ORDER BY created_at""", (session_id,))

    chat_history = []
    for row in result.fetchall():
        query = row['user_query']
        response = row['model_response']
        chat_history.extend([HumanMessage(query),
                            AIMessage(response)])
        
    conn.close()
    return chat_history

In [155]:
create_application_logs(DB_NAME)

In [162]:
# Based on session, retrieve chat history
session_id = str(uuid.uuid4())
query = "What does the Attention is all you need paper talk about?"
chat_history = get_chat_history(DB_NAME, session_id)
response = rag_chain.invoke({"input": query, "chat_history": chat_history})

insert_application_logs(DB_NAME, session_id, query, response['answer'], CHAT_MODEL)
print("Query: ", query)
print("Response: ", response['answer'])

Query:  What does the Attention is all you need paper talk about?
Response:  The "Attention Is All You Need" paper introduces the Transformer model architecture, which utilizes self-attention mechanisms to process sequences without relying on recurrence. It highlights the effectiveness of attention mechanisms in various tasks such as reading comprehension, abstractive summarization, and textual entailment. The paper emphasizes that attention allows for modeling dependencies in sequences regardless of their distance, and it presents a novel approach that moves away from traditional recurrent networks.


In [163]:
query = "Who introduced this idea?"
chat_history = get_chat_history(DB_NAME, session_id)
response = rag_chain.invoke({"input": query, "chat_history": chat_history})
insert_application_logs(DB_NAME, session_id, query, response['answer'], CHAT_MODEL) 

print("Query: ", query)
print("Response: ", response['answer'])

Query:  Who introduced this idea?
Response:  The idea of the Transformer models was introduced by Ashish Vaswani and Illia Polosukhin, who designed and implemented the first Transformer models. Noam Shazeer also played a significant role by proposing scaled dot-product attention, multi-head attention, and the parameter-free position representation.
