# Langchain

In [1]:
# Note: nomic-embed-text is broken in ollama

In [2]:
# Dependencies

# source /home/tom/WD/.venv/bin/activate && /home/tom/WD/.venv/bin/pip install jupyterlab-lsp python-lsp-server llama-index-core llama-index-readers-file llama-index-llms-ollama llama-index-embeddings-ollama unstructured langchain chromadb langchain-text-splitters google-cloud-vision google-cloud-storage deep_translator docx PyMuPDF llama-index-vector-stores-chroma torch torchvision torchtext langchainhub langchain-qdrant  langchain transformers accelerate sentence-transformers tensorflow langchain-community llama-index-vector-stores-qdrant

In [3]:
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_community.document_loaders import OnlinePDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [4]:
#note: pypdf also records title, page numeber etc required by chain-5

from langchain_community.document_loaders import DirectoryLoader, PyMuPDFLoader

#loader = DirectoryLoader('/home/tom/Python/Tools/RAG and PDF/docs', glob="**/*.pdf", loader_cls=UnstructuredPDFLoader, show_progress=True)
loader = DirectoryLoader('docs', glob="**/*.pdf", loader_cls=PyMuPDFLoader, show_progress=True)
repo_files = loader.load()


100%|███████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  7.52it/s]


In [5]:

print(f"Number of files loaded: {len(repo_files)}")
#
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
documents = text_splitter.split_documents(documents=repo_files)
print(f"Number of documents : {len(documents)}")

Number of files loaded: 11
Number of documents : 20


In [6]:
from langchain_community.embeddings import OllamaEmbeddings
#from langchain_community.vectorstores import Qdrant
from langchain_qdrant import Qdrant

In [7]:
model_name = "mxbai-embed-large"
embeddings = OllamaEmbeddings(model=model_name,
 show_progress=True,
 )

In [6]:
#Index new docs to db

qdrant = Qdrant.from_documents(
 documents,
 embeddings,
 path="langchain_local_qdrant_pdf",
 collection_name="my_documents",
)

OllamaEmbeddings: 100%|█████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.61s/it]
OllamaEmbeddings: 100%|███████████████████████████████████████████████████| 63/63 [00:48<00:00,  1.29it/s]


In [8]:
#Restore db

qdrant = Qdrant.from_existing_collection(
 embeddings,
 path="langchain_local_qdrant_pdf",
 collection_name="my_documents",
)

In [9]:
def pretty_print_docs(documents):
    for doc in documents:
        print(doc.metadata)
        print(" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ")
        print(doc.page_content)


In [10]:
query = "question here?"
found_docs = qdrant.similarity_search(query)
#pretty_print_docs(found_docs)

OllamaEmbeddings: 100%|█████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.41s/it]


In [11]:
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_core.prompts.chat import HumanMessagePromptTemplate

# LLM from Ollama
local_model = "llama3.1"
llm = ChatOllama(model=local_model)


### Fast reply

In [12]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")
Prompt: ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])
chain = prompt | llm | StrOutputParser()
#response = chain.invoke({"question":query,"context":found_docs})
#print(response)

In [None]:

chunks_answer = []
for chunk in chain.stream({"question":query,"context":found_docs}):
    print(chunk, end='')
    chunks_answer.append(chunk)

### Chain Retreival replay (Slower)

In [16]:
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

In [17]:
from langchain import hub



retriever = MultiQueryRetriever.from_llm(
    qdrant.as_retriever(), 
    llm,
    prompt=QUERY_PROMPT
)

# RAG prompt
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

prompt = hub.pull("rlm/rag-prompt")
Prompt: ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])


In [18]:
# Define templates for prompts
from langchain_core.runnables import RunnableLambda
from operator import itemgetter
from langchain.memory import ConversationBufferMemory
from typing import List, Tuple
from langchain.schema import format_document

#Initialte chat_history

chat_history = []


# Create a memory instance
memory = ConversationBufferMemory(
    return_messages=True, output_key="answer", input_key="question", memory_key="chat_history"
)

# Define steps for the chain
loaded_memory = RunnablePassthrough.assign(
    chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("chat_history"),
)



# Define templates for prompts
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""


ANSWER_PROMPT = ChatPromptTemplate.from_template(template)

DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")


def _format_chat_history(chat_history: List[Tuple]) -> str:
    buffer = ""
    for dialogue_turn in chat_history:
        human = "HumanMessage: " + dialogue_turn[0]
        ai = "AIMessage: " + dialogue_turn[1]
        buffer += "\n" + "\n".join([human, ai])
    return buffer




def _combine_documents(
    docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)




standalone_question = {
    "standalone_question": {
        "question": lambda x: x["question"],
        "chat_history": lambda x: x["chat_history"],        
#        "chat_history": lambda x: _format_chat_history(x["chat_history"]),
    }
    | CONDENSE_QUESTION_PROMPT
    | llm
    | StrOutputParser(),
}

retrieved_documents = {
    "docs": itemgetter("standalone_question") | retriever,
    "question": lambda x: x["standalone_question"],
}

final_inputs = {
    "context": lambda x: _combine_documents(x["docs"]),
    "question": itemgetter("question"),
}

answer = {
    "answer": final_inputs | ANSWER_PROMPT | llm,
    "docs": itemgetter("docs"),
}

# Create the final chain by combining the steps
final_chain = loaded_memory | standalone_question | retrieved_documents | answer

In [None]:
#stream chain 7

input = """
Question Here
"""
inputs = {"question": input, "chat_history": chat_history}



chunks = []
chunks_answer = []
for chunk in final_chain.stream(inputs):
    chunks.append(chunk)
    if 'answer' in chunk:
        print(chunk['answer'].content, end='')
        chunks_answer.append(chunk['answer'].content)
    else:
        pass


#Below code might not work


# Save the conversation in memory
#generated_answer = chunks['answer']

from langchain.schema.messages import HumanMessage, AIMessage

chat_history.extend([
    HumanMessage(content=input),
    AIMessage(content=chunks_answer),
    #AIMessage(content=result["answer"].content),
])


# Load memory to see the conversation history
memory.load_memory_variables({})

#memory.save_context(inputs, {"answer": generated_answer.content})
memory.save_context(inputs, {"answer": chunks_answer})

In [None]:
#def extract_source_and_page(chunks):
#    for chunk in chunks:
#        if 'docs' in chunk:
#            for doc in chunk['docs']:
#                source = doc.metadata.get('source', 'Unknown Source')
#                page = doc.metadata.get('page', 'Unknown Page')
#                print(f"Source: {source}, Page: {page}")





def extract_source_and_page(chunks):
    source_pages = {}

    for chunk in chunks:
        if 'docs' in chunk:
            for doc in chunk['docs']:
                source = doc.metadata.get('source', 'Unknown Source')
                page = doc.metadata.get('page', 'Unknown Page')
                if source in source_pages:
                    source_pages[source].append(page)
                else:
                    source_pages[source] = [page]

    for source, pages in source_pages.items():
        pages_str = ", ".join(map(str, pages))
        print(f"Source: {source}, Pages: {pages_str}")

extract_source_and_page(chunks)


In [None]:
chunks