# Langchain

In [1]:
# Note: nomic-embed-text is broken in ollama

In [2]:
# Dependencies

# source /home/tom/WD/.venv/bin/activate && /home/tom/WD/.venv/bin/pip install jupyterlab-lsp python-lsp-server llama-index-core llama-index-readers-file llama-index-llms-ollama llama-index-embeddings-ollama unstructured langchain chromadb langchain-text-splitters google-cloud-vision google-cloud-storage deep_translator docx PyMuPDF llama-index-vector-stores-chroma torch torchvision torchtext langchainhub langchain-qdrant  langchain transformers accelerate sentence-transformers tensorflow langchain-community llama-index-vector-stores-qdrant

In [3]:
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_community.document_loaders import OnlinePDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [4]:
#note: pypdf also records title, page numeber etc required by chain-5

from langchain_community.document_loaders import DirectoryLoader, PyMuPDFLoader

#loader = DirectoryLoader('/home/tom/Python/Tools/RAG and PDF/docs', glob="**/*.pdf", loader_cls=UnstructuredPDFLoader, show_progress=True)
loader = DirectoryLoader('docs', glob="**/*.pdf", loader_cls=PyMuPDFLoader, show_progress=True)
repo_files = loader.load()


100%|███████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.38it/s]


In [5]:

print(f"Number of files loaded: {len(repo_files)}")
#
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=150)
documents = text_splitter.split_documents(documents=repo_files)
print(f"Number of documents : {len(documents)}")

Number of files loaded: 13
Number of documents : 63


In [6]:
from langchain_ollama import OllamaEmbeddings
#from langchain_community.vectorstores import Qdrant
from langchain_qdrant import Qdrant

In [7]:
model_name = "mxbai-embed-large"
embeddings = OllamaEmbeddings(model=model_name)

In [8]:
#Index new docs to db

qdrant = Qdrant.from_documents(
 documents,
 embeddings,
 path="langchain_local_qdrant_pdf_orig",
 collection_name="my_documents",
)

In [6]:
#Restore db

qdrant = Qdrant.from_existing_collection(
 embeddings,
 path="langchain_local_qdrant_pdf",
 collection_name="my_documents",
)

In [9]:
def pretty_print_docs(documents):
    for doc in documents:
        print(doc.metadata)
        print(" - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ")
        print(doc.page_content)


In [10]:
query = "what is value network?"
found_docs = qdrant.similarity_search(query)
#pretty_print_docs(found_docs)

In [11]:
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_core.prompts.chat import HumanMessagePromptTemplate

# LLM from Ollama
local_model = "llama3.2"
llm = ChatOllama(model=local_model)


### Fast reply

In [10]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")
Prompt: ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])
chain = prompt | llm | StrOutputParser()
#response = chain.invoke({"question":query,"context":found_docs})
#print(response)

In [11]:

chunks_answer = []
for chunk in chain.stream({"question":query,"context":found_docs}):
    print(chunk, end='')
    chunks_answer.append(chunk)

According to the International Standards Organization, usability refers to "the extent to which a product can be used by specified users to achieve specified goals with effectiveness, efficiency and satisfaction in a specified context of use." It includes aspects such as usefulness, effectiveness (ease of use), learnability, and attitude (likeability).

### Chain Retreival replay (Slower)

In [12]:
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

In [13]:
from langchain import hub



retriever = MultiQueryRetriever.from_llm(
    qdrant.as_retriever(), 
    llm,
    prompt=QUERY_PROMPT
)

# RAG prompt
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

prompt = hub.pull("rlm/rag-prompt")
Prompt: ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])




In [14]:
# Define templates for prompts
from langchain_core.runnables import RunnableLambda
from operator import itemgetter
from langchain.memory import ConversationBufferMemory
from typing import List, Tuple
from langchain.schema import format_document

#Initialte chat_history

chat_history = []


# Create a memory instance
memory = ConversationBufferMemory(
    return_messages=True, output_key="answer", input_key="question", memory_key="chat_history"
)

# Define steps for the chain
loaded_memory = RunnablePassthrough.assign(
    chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("chat_history"),
)



# Define templates for prompts
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""


ANSWER_PROMPT = ChatPromptTemplate.from_template(template)

DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")


def _format_chat_history(chat_history: List[Tuple]) -> str:
    buffer = ""
    for dialogue_turn in chat_history:
        human = "HumanMessage: " + dialogue_turn[0]
        ai = "AIMessage: " + dialogue_turn[1]
        buffer += "\n" + "\n".join([human, ai])
    return buffer




def _combine_documents(
    docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)




standalone_question = {
    "standalone_question": {
        "question": lambda x: x["question"],
        "chat_history": lambda x: x["chat_history"],        
#        "chat_history": lambda x: _format_chat_history(x["chat_history"]),
    }
    | CONDENSE_QUESTION_PROMPT
    | llm
    | StrOutputParser(),
}

retrieved_documents = {
    "docs": itemgetter("standalone_question") | retriever,
    "question": lambda x: x["standalone_question"],
}

final_inputs = {
    "context": lambda x: _combine_documents(x["docs"]),
    "question": itemgetter("question"),
}

answer = {
    "answer": final_inputs | ANSWER_PROMPT | llm,
    "docs": itemgetter("docs"),
}

# Create the final chain by combining the steps
final_chain = loaded_memory | standalone_question | retrieved_documents | answer

  memory = ConversationBufferMemory(


In [17]:
#with new memory

# Define templates for prompts
from langchain_core.runnables import RunnableLambda
from operator import itemgetter
from langchain.memory import ConversationKGMemory
from typing import List, Tuple
from langchain.schema import format_document

#Initialte chat_history

chat_history = []


# Create a memory instance
memory = ConversationKGMemory(
    llm = llm, return_messages=True, output_key="answer", input_key="question", memory_key="chat_history"
)

# Define steps for the chain
loaded_memory = RunnablePassthrough.assign(
    chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("chat_history"),
)



# Define templates for prompts
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""


ANSWER_PROMPT = ChatPromptTemplate.from_template(template)

DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")


def _format_chat_history(chat_history: List[Tuple]) -> str:
    buffer = ""
    for dialogue_turn in chat_history:
        human = "HumanMessage: " + dialogue_turn[0]
        ai = "AIMessage: " + dialogue_turn[1]
        buffer += "\n" + "\n".join([human, ai])
    return buffer




def _combine_documents(
    docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)




standalone_question = {
    "standalone_question": {
        "question": lambda x: x["question"],
        "chat_history": lambda x: x["chat_history"],        
#        "chat_history": lambda x: _format_chat_history(x["chat_history"]),
    }
    | CONDENSE_QUESTION_PROMPT
    | llm
    | StrOutputParser(),
}

retrieved_documents = {
    "docs": itemgetter("standalone_question") | retriever,
    "question": lambda x: x["standalone_question"],
}

final_inputs = {
    "context": lambda x: _combine_documents(x["docs"]),
    "question": itemgetter("question"),
}

answer = {
    "answer": final_inputs | ANSWER_PROMPT | llm,
    "docs": itemgetter("docs"),
}

# Create the final chain by combining the steps
final_chain = loaded_memory | standalone_question | retrieved_documents | answer

In [None]:
#stream chain 7

input = """
whats the value network for?
"""
inputs = {"question": input, "chat_history": chat_history}



chunks = []
chunks_answer = []
for chunk in final_chain.stream(inputs):
    chunks.append(chunk)
    if 'answer' in chunk:
        print(chunk['answer'].content, end='')
        chunks_answer.append(chunk['answer'].content)
    else:
        pass


#Below code might not work


# Save the conversation in memory
#generated_answer = chunks['answer']

from langchain.schema.messages import HumanMessage, AIMessage

chat_history.extend([
    HumanMessage(content=input),
    AIMessage(content=chunks_answer),
    #AIMessage(content=result["answer"].content),
])


# Load memory to see the conversation history
#memory.load_memory_variables({})

#memory.save_context(inputs, {"answer": generated_answer.content})
memory.save_context(inputs, {"answer": chunks_answer})

In [None]:
#def extract_source_and_page(chunks):
#    for chunk in chunks:
#        if 'docs' in chunk:
#            for doc in chunk['docs']:
#                source = doc.metadata.get('source', 'Unknown Source')
#                page = doc.metadata.get('page', 'Unknown Page')
#                print(f"Source: {source}, Page: {page}")





def extract_source_and_page(chunks):
    source_pages = {}

    for chunk in chunks:
        if 'docs' in chunk:
            for doc in chunk['docs']:
                source = doc.metadata.get('source', 'Unknown Source')
                page = doc.metadata.get('page', 'Unknown Page')
                if source in source_pages:
                    source_pages[source].append(page)
                else:
                    source_pages[source] = [page]

    for source, pages in source_pages.items():
        pages_str = ", ".join(map(str, pages))
        print(f"Source: {source}, Pages: {pages_str}")

extract_source_and_page(chunks)


In [None]:
#chunks

# LLAMAINDEX

In [18]:
from llama_index.core import SimpleDirectoryReader
documents = SimpleDirectoryReader("docs4").load_data()

In [19]:
#import ollama
from llama_index.llms.ollama import Ollama

llm = Ollama(model="llama3.1", request_timeout=300.0)
#llm = Ollama(model="llama3",base_url="http://192.168.1.232:11435") #llm = Ollama(model="llama2")

In [20]:
from llama_index.core import (
    SimpleDirectoryReader,
    VectorStoreIndex,
    Settings,
    ServiceContext,
    StorageContext,
    load_index_from_storage,
)

from llama_index.core.tools import QueryEngineTool, ToolMetadata

In [21]:
#embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
#from llama_index.embeddings.huggingface import HuggingFaceEmbedding


from llama_index.embeddings.ollama import OllamaEmbedding

ollama_embedding = OllamaEmbedding(
    model_name="mxbai-embed-large",
    #base_url="http://localhost:11434",
    ollama_additional_kwargs={"mirostat": 0},
)

In [22]:
#set global parameters
from llama_index.core.node_parser import SentenceSplitter

Settings.llm = llm
Settings.embed_model = ollama_embedding
Settings.node_parser = SentenceSplitter(chunk_size=512, chunk_overlap=20)
Settings.num_output = 512
Settings.context_window = 3900


In [23]:
#Save to disc (Only execute this block if you have additional documents to be added to the database)

import qdrant_client
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import VectorStoreIndex


client = qdrant_client.QdrantClient(path="./llamaindex_local_qdrant_pdf")

vector_store = QdrantVectorStore(client=client, collection_name="usability")

storage_context = StorageContext.from_defaults(vector_store=vector_store)
index_usability = VectorStoreIndex.from_documents( documents, storage_context=storage_context, embed_model=ollama_embedding, llm=llm, node_parser = SentenceSplitter(chunk_size=512, chunk_overlap=20), num_output = 512, context_window = 3900, show_progress=True)

index_usability.storage_context.persist(persist_dir="./llamaindex_local_qdrant_pdf")

  from .autonotebook import tqdm as notebook_tqdm
Parsing nodes: 100%|█████████████████████████████████████████████████████| 13/13 [00:00<00:00, 144.92it/s]
Generating embeddings: 100%|██████████████████████████████████████████████| 51/51 [00:50<00:00,  1.01it/s]


In [6]:
#Load from disc
import qdrant_client
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import VectorStoreIndex


client = qdrant_client.QdrantClient(path="./llamaindex_local_qdrant_pdf")

vector_store = QdrantVectorStore(client=client, collection_name="usability")

storage_context = StorageContext.from_defaults(vector_store=vector_store, persist_dir="./llamaindex_local_qdrant_pdf")

index_usability = VectorStoreIndex.from_vector_store(vector_store, storage_context=storage_context, embed_model=ollama_embedding, llm=llm, node_parser = SentenceSplitter(chunk_size=512, chunk_overlap=20), num_output = 512, context_window = 3900)

### Q and A

In [24]:
from llama_index.core import Prompt
from llama_index.core import PromptTemplate

template = (
    "We have provided trusted context information below. \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Given this trusted and cientific information, please answer the question: {query_str}. Remember that the statements of the context are verfied and come from trusted sources.\n"
)
qa_template = Prompt(template)

new_summary_tmpl_str = (
    "The original query is as follows: {query_str}"
    "We have provided an existing answer: {existing_answer}"
    "We have the opportunity to refine the existing answer (only if needed) with some more trusted context below. Remember that the statements of the context are verfied and come from trusted sources."
    "------------"
    "{context_msg}"
    "------------"
    "Given the new trusted context, refine the original answer to better answer the query. If the context isn't useful, return the original answer. Remember that the statements of the new context are verfied and come from trusted sources."
    "Refined Answer: sure thing! "
)
new_summary_tmpl = PromptTemplate(new_summary_tmpl_str)

In [25]:
from llama_index.core.retrievers import BaseRetriever, VectorIndexRetriever, KeywordTableSimpleRetriever
from llama_index.core import get_response_synthesizer
from llama_index.core.query_engine import RetrieverQueryEngine

retriever = VectorIndexRetriever(
    index=index_usability,
    similarity_top_k=4, # Change this to lower value for higher performance
)

response_synthesizer = get_response_synthesizer( ##try compact?
    text_qa_template=qa_template,
    streaming=True,
    refine_template=new_summary_tmpl
)
query_engine3 = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    # node_postprocessors=[
    #     SimilarityPostprocessor(similarity_cutoff=0.7)
    # ]
)

In [26]:
response = query_engine3.query("Explain value network?")

In [27]:
response.print_response_stream()

Based on the provided context, a Value Network is a type of deep neural network architecture used to estimate values for unexplored states in a problem-solving setting. It is trained end-to-end with backpropagation and designed to provide predictions in real-time.

In this specific study, the Value Network is used as an alternative to model-based simulations (MCTS) to estimate rewards or values for each state. The Value Network is pre-trained on data generated using MCTS roll-outs from randomly sampled initial states. It takes into account design features and user information to predict value estimates for any given state.

The Value Network architecture consists of multiple input branches, which are treated as independent model branches that are eventually concatenated and passed to multiple output branches (tails). This m-headed n-tailed architecture is trained using a combination of model-based data generated from MCTS roll-outs and other training data.

By utilizing the Value Netwo

In [None]:
print(response)

### Chat

In [9]:
from llama_index.core import PromptTemplate
from llama_index.core.llms import ChatMessage, MessageRole
from llama_index.core.chat_engine import CondenseQuestionChatEngine

custom_prompt = PromptTemplate(
    """\
Given a conversation (between Human and Assistant) and a follow up message from Human, \
rewrite the message to be a standalone question that captures all relevant context \
from the conversation.

<Chat History>
{chat_history}

<Follow Up Message>
{question}

<Standalone question>
"""
)

# list of `ChatMessage` objects
custom_chat_history = [
    ChatMessage(
        role=MessageRole.USER,
        content="Hello assistant, we are having a insightful discussion about Additive Manufacturing today.",
    ),
    ChatMessage(role=MessageRole.ASSISTANT, content="Okay, sounds good."),
]

#query_engine = index_usability.as_query_engine()
chat_engine = CondenseQuestionChatEngine.from_defaults(
    query_engine=query_engine3,
    streaming=True,
    #condense_question_prompt=custom_prompt,
    #chat_history=custom_chat_history,
    #verbose=True,
)

In [10]:
response = chat_engine.stream_chat("what all is composed in value network?")

In [None]:
response.print_response_stream()

In [16]:
from IPython.display import Markdown, display
display(Markdown(f"<b>{response}</b>"))

<b>According to the provided text, the following components are composed in a Value Network:

1. **Input Branches (Model) Input Design Features**: These refer to the design features used as input for the Value Network.
2. **User Features**: These are the user-related features that serve as inputs for the Value Network.
3. **Output Branches (Model) Value Predictions**: The output of the Value Network, which consists of predicted value estimates based on the input design and user features.

In essence, a Value Network in this context takes in both design and user features as inputs and generates value predictions or estimates as outputs.</b>

### sources available only in Q an A

In [11]:
response.metadata

{'afe201a8-0d02-4683-868c-2b6b44405f65': {'page_label': '10',
  'file_name': 'todi2021.pdf',
  'file_path': '/home/tom/WD/Tools/RAG and PDF/Ollama/test/docs/todi2021.pdf',
  'file_type': 'application/pdf',
  'file_size': 5440821,
  'creation_date': '2024-07-25',
  'last_modified_date': '2023-11-09'},
 '3a000054-5a30-43d7-9e98-d472cfdd5024': {'page_label': '5',
  'file_name': 'todi2021.pdf',
  'file_path': '/home/tom/WD/Tools/RAG and PDF/Ollama/test/docs/todi2021.pdf',
  'file_type': 'application/pdf',
  'file_size': 5440821,
  'creation_date': '2024-07-25',
  'last_modified_date': '2023-11-09'},
 'a502f251-dcfa-463d-8f77-785e00f6ec9d': {'page_label': '12',
  'file_name': 'todi2021.pdf',
  'file_path': '/home/tom/WD/Tools/RAG and PDF/Ollama/test/docs/todi2021.pdf',
  'file_type': 'application/pdf',
  'file_size': 5440821,
  'creation_date': '2024-07-25',
  'last_modified_date': '2023-11-09'},
 'e9a4bb05-0595-4af1-85fb-eb0bceb4f058': {'page_label': '9',
  'file_name': 'todi2021.pdf',
  '

In [12]:
import re
if hasattr(response, 'metadata'):
    document_info = str(response.metadata)
    find = re.findall(r"'page_label': '[^']*', 'file_name': '[^']*'", document_info)

    print('\n'+'=' * 60+'\n')
    print('Context Information')
    print(str(find))
    print('\n'+'=' * 60+'\n')



Context Information
["'page_label': '10', 'file_name': 'todi2021.pdf'", "'page_label': '5', 'file_name': 'todi2021.pdf'", "'page_label': '12', 'file_name': 'todi2021.pdf'", "'page_label': '9', 'file_name': 'todi2021.pdf'"]


