## Ingesting PDF

In [None]:
%pip install --q unstructured langchain
%pip install --q "unstructured[all-docs]"

In [54]:
!ollama pull nomic-embed-text

[?25lpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠧ [?25h[?25l[2K[1Gpulling manifest ⠇ [?25h[?25l[2K[1Gpulling manifest ⠏ [?25h[?25l[2K[1Gpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest 
pulling 970aa74c0a90... 100% ▕████████████████▏ 274 MB                         
pulling c71d239df917... 100% ▕████████████████▏  11 KB                         
pulling ce4a164fc046... 100% ▕████████████████▏   17 B                         
pulling 31df23ea7daa... 100% ▕████████████████▏  420 B                         
verifying sha256 digest ⠋ [?25h[?25l[2K[1G[A[2K[1G[A[2K[1G[A

In [26]:
%pip install --q chromadb
%pip install --q langchain-text-splitters

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [55]:
!ollama list

NAME                   	ID          	SIZE  	MODIFIED      
llama3:latest          	71a106a91016	4.7 GB	19 hours ago 	
llava:latest           	8dd30f6b0cb1	4.7 GB	3 hours ago  	
nomic-embed-text:latest	0a109f422b47	274 MB	9 seconds ago	


### Program Starts From Here

In [1]:
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_community.document_loaders import OnlinePDFLoader

In [None]:
#Use method 1 or 2

In [None]:
#method-1:upload single document

local_path = "book2.pdf"

# Local PDF file uploads
if local_path:
  loader = UnstructuredPDFLoader(file_path=local_path)
  data = loader.load()
else:
  print("Upload a PDF file")

In [2]:
#method-2:upload multiple documents from a folder
#note: pypdf also records title, page numeber etc required by chain-5

from langchain_community.document_loaders import DirectoryLoader, PyMuPDFLoader

#loader = DirectoryLoader('/home/tom/Python/Tools/RAG and PDF/docs', glob="**/*.pdf", loader_cls=UnstructuredPDFLoader, show_progress=True)
loader = DirectoryLoader('/home/tom/Python/Tools/RAG and PDF/docs', glob="**/*.pdf", loader_cls=PyMuPDFLoader, show_progress=True)
data = loader.load()


100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:04<00:00,  1.22it/s]


In [3]:
# Preview first page
data[0].page_content

'Technologies of Additive Manufacturing\n\nDr Matthias Hien\n\nLiterature:\n\n1. Gibson: Additive Manufacturing Technologies 2. https://www.engineersgarage.com/articles/3d-printing- processes-binder-jetting\n\n3. https://www.voxeljet.com/materialien/kunststoff/pmma-ppb- ppc-ppc2/\n\n4. https://www.youtube.com/watch?v=maO3XxB1imU 5. S. Nachum, J. Vogt, F. Raether : Additive Manufacturing of Ceramics: Stereolithography versus Binder Jetting\n\n6. C.Hauser:IMAGE TRANSFORMATIONS AND PRINTING OF PLASTER LAYERS IN SPIRAL GROWTH MANUFACTURING\n\nAdditive Manufacturing Technologies\n\nBinder Jetting\n\nBinder Jetting\n\n1. Introduction 2. Materials 3. Process Variations 4. Process Benefits and Drawback\n\nBinder Jetting\n\n1. Introduction\n\nBinder jetting is copyrighted by the name 3DP technology. It was developed at Massachusetts Institute of Technology (MIT) in 1993. Later on, the license of the technology was obtained by Z Corporation in 1995.\n\nhttps://www.engineersgar age.com/articles/3

## Vector Embeddings

In [3]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma

In [4]:
# Split and chunk 
text_splitter = RecursiveCharacterTextSplitter(chunk_size=7500, chunk_overlap=100)
chunks = text_splitter.split_documents(data)

In [None]:
#you can create a db or load from existing database

In [5]:
# Add documents to vector database in addition to persistant directory (execute this or the one below)
vector_db = Chroma.from_documents(
    documents=chunks, 
    embedding=OllamaEmbeddings(model="nomic-embed-text",show_progress=True),
    collection_name="local-rag",
    persist_directory="./chroma_db_pdf"
)

OllamaEmbeddings: 100%|████████████████████████████████████████████████████████████████| 11/11 [00:11<00:00,  1.04s/it]


In [6]:
#Load from existing persistent directory (execute this or the one above)

vector_db = Chroma(
    persist_directory="./chroma_db_pdf",
    embedding_function=OllamaEmbeddings(model="nomic-embed-text",show_progress=True),
    collection_name="local-rag"
)

In [7]:
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

## Search relevant docs - Using Retrieval

In [19]:
# performs a similarity check and returns the top K embeddings
# that are similar to the question's embeddings
retriever = vector_db.as_retriever(search_type="mmr",       
                                        search_kwargs={"k": 8})

In [None]:
retrieved_relevant_docs = retriever.get_relevant_documents(
    "Binder Jetting"
)

for doc in retrieved_relevant_docs:
    print(doc.page_content)
    print('\n')

## Retrieval

In [10]:
# LLM from Ollama
local_model = "llama3"
llm = ChatOllama(model=local_model)

In [11]:
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

In [12]:
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(), 
    llm,
    prompt=QUERY_PROMPT
)

# RAG prompt
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

### Use chain of your choice

In [None]:
#chain1 - Original (maybe not good for multiple documents?)

In [43]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
#Chain2: With history and Sources

In [9]:
from langchain.memory import ConversationBufferMemory 
memory = ConversationBufferMemory(input_key="question",
                                   memory_key="history",
                                   return_messages=True)

In [13]:
custom_prompt_template = """
### System:
You are an AI assistant that follows instructions extremely well. Help as much as you can.
### User:
You are a research assistant for an artificial intelligence student. Use only the following information to answer user queries:
Context= {context}
History = {history}
Question= {question}
### Assistant:
"""

prompt = PromptTemplate(template=custom_prompt_template,
                        input_variables=["question", "context", "history"])

In [14]:
from langchain.chains import RetrievalQA
qa_chain = RetrievalQA.from_chain_type(
                      llm=llm, chain_type='stuff',
                      retriever = retriever,
                      return_source_documents = True,
                      chain_type_kwargs = {"verbose": False,
                                           "prompt": prompt,
                                           "memory": memory  
                                           })

In [None]:
#Chain3: with sources

In [42]:
from langchain.chains import RetrievalQA
# create the chain to answer questions 
qa_chain = RetrievalQA.from_chain_type(llm=llm, 
                                  chain_type="stuff", 
                                  retriever=retriever, 
                                  return_source_documents=True)

In [None]:
#Chain4: Original chain (Chain1) Modifies for sources

In [None]:
from langchain_core.runnables import RunnableParallel, RunnableLambda
from operator import itemgetter

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


from langchain.memory import ConversationBufferMemory 
memory = ConversationBufferMemory(input_key="question",
                                   memory_key="history",
                                   return_messages=True)



rag_chain_from_docs = (
#    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])), history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"))    
    | prompt
    | llm
    | StrOutputParser()
)

chain = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

In [None]:
#Chain 5: 

In [None]:
from operator import itemgetter
from typing import List

from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import (
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)



from langchain.memory import ConversationBufferMemory 
memory = ConversationBufferMemory(input_key="question",
                                   memory_key="history",
                                   return_messages=True)


#this chain can be also run with format_docs from chain 5
def format_docs(docs: List[Document]) -> str:
    """Convert Documents to a single string.:"""
    formatted = [
        f"Article Title: {doc.metadata['title']}\nArticle Snippet: {doc.page_content}"
        for doc in docs
    ]
    return "\n\n" + "\n\n".join(formatted)


format = itemgetter("docs") | RunnableLambda(format_docs)
# subchain for generating an answer once we've done retrieval
answer = prompt | llm | StrOutputParser()
# complete chain that calls wiki -> formats docs to string -> runs answer subchain -> returns just the answer and retrieved docs.
chain = (
    RunnableParallel(question=RunnablePassthrough(), docs=retriever)
    .assign(context=format)
    .assign(answer=answer)
    .pick(["answer", "docs", "memory"]) # you can remove "docs" if not needed
)

In [None]:
#Chain6

In [None]:
# Define templates for prompts
from langchain_core.runnables import RunnableLambda
from operator import itemgetter
from langchain.memory import ConversationBufferMemory
from typing import List, Tuple
from langchain.schema import format_document

# Create a memory instance
memory = ConversationBufferMemory(
    return_messages=True, output_key="answer", input_key="question"
)

# Define steps for the chain
loaded_memory = RunnablePassthrough.assign(
    chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
)



# Define templates for prompts
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
ANSWER_PROMPT = ChatPromptTemplate.from_template(template)



DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")




def _combine_documents(
    docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)




standalone_question = {
    "standalone_question": {
        "question": lambda x: x["question"],
        "chat_history": lambda x: x["chat_history"],
    }
    | CONDENSE_QUESTION_PROMPT
    | llm
    | StrOutputParser(),
}

retrieved_documents = {
    "docs": itemgetter("standalone_question") | retriever,
    "question": lambda x: x["standalone_question"],
}

final_inputs = {
    "context": lambda x: _combine_documents(x["docs"]),
    "question": itemgetter("question"),
}

answer = {
    "answer": final_inputs | ANSWER_PROMPT | llm,
    "docs": itemgetter("docs"),
}

# Create the final chain by combining the steps
final_chain = loaded_memory | standalone_question | retrieved_documents | answer


In [None]:
#Chain 7 (Best to use this one)

In [None]:
# Define templates for prompts
from langchain_core.runnables import RunnableLambda
from operator import itemgetter
from langchain.memory import ConversationBufferMemory
from typing import List, Tuple
from langchain.schema import format_document

#Initialte chat_history

chat_history = []


# Create a memory instance
memory = ConversationBufferMemory(
    return_messages=True, output_key="answer", input_key="question", memory_key="chat_history"
)

# Define steps for the chain
loaded_memory = RunnablePassthrough.assign(
    chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("chat_history"),
)



# Define templates for prompts
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""


ANSWER_PROMPT = ChatPromptTemplate.from_template(template)



DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")


def _format_chat_history(chat_history: List[Tuple]) -> str:
    buffer = ""
    for dialogue_turn in chat_history:
        human = "HumanMessage: " + dialogue_turn[0]
        ai = "AIMessage: " + dialogue_turn[1]
        buffer += "\n" + "\n".join([human, ai])
    return buffer




def _combine_documents(
    docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)




standalone_question = {
    "standalone_question": {
        "question": lambda x: x["question"],
        "chat_history": lambda x: x["chat_history"],        
#        "chat_history": lambda x: _format_chat_history(x["chat_history"]),
    }
    | CONDENSE_QUESTION_PROMPT
    | llm
    | StrOutputParser(),
}

retrieved_documents = {
    "docs": itemgetter("standalone_question") | retriever,
    "question": lambda x: x["standalone_question"],
}

final_inputs = {
    "context": lambda x: _combine_documents(x["docs"]),
    "question": itemgetter("question"),
}

answer = {
    "answer": final_inputs | ANSWER_PROMPT | llm,
    "docs": itemgetter("docs"),
}

# Create the final chain by combining the steps
final_chain = loaded_memory | standalone_question | retrieved_documents | answer

### Diplay results

In [None]:
#Chain1

#Enter your question in the bracket and press enter
chain.invoke(input(""))

In [16]:
#Invoke Chain 2 and Chain 3

## Cite sources
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])


# break it down
llm_response = qa_chain.invoke(input(""))
process_llm_response(llm_response)

 what would be its disadvantages?


OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.25s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 16.59it/s]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  6.03it/s]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 16.06it/s]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.93it/s]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  9.85it/s]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 12.56it/s]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.10it/s]
OllamaEmbeddings: 100%|█████████████████

While Binder Jetting has many advantages, it also has some limitations and potential drawbacks. Some of the primary disadvantages include:

1. **Limited part resolution**: The layer thickness and powder size can limit the achievable part resolution, making it challenging to produce parts with small features or high-zirconia ceramic.
2. **Binder removal required**: After printing, the binder must be removed through a process like washing or solvent evaporation, which can be time-consuming and may not always be 100% effective.
3. **Part density and strength**: The part's density and strength might be compromised if the binding agent is not evenly distributed or if there are air pockets within the powder bed.
4. **Material limitations**: Although Binder Jetting allows for various metal and ceramic powders, some materials may not work well with the process or exhibit reduced properties compared to other AM methods.
5. **High energy consumption**: The printing process requires a significant

In [None]:
#Invoke Chain 4

rag_chain_with_source.invoke(input(""))

In [None]:
#Invoke chain 6

input = "what advantages does personas have?"
inputs = {"question": input, "chat_history": chat_history}
result = final_chain.invoke(inputs)
# Save the conversation in memory
generated_answer = result['answer']

# Load memory to see the conversation history
memory.load_memory_variables({})


memory.save_context(inputs, {"answer": generated_answer.content})


In [None]:
#Invoke chain 7

input = "what are their advantages?"
inputs = {"question": input, "chat_history": chat_history}
result = final_chain.invoke(inputs)
# Save the conversation in memory
generated_answer = result['answer']

from langchain.schema.messages import HumanMessage, AIMessage

chat_history.extend([
    HumanMessage(content=input),
    AIMessage(content=result["answer"].content),
])


# Load memory to see the conversation history
memory.load_memory_variables({})

memory.save_context(inputs, {"answer": generated_answer.content})




### Streaming results (Migh not retain context unlike the revoke method (chain 6 and 7 retain context though))

In [None]:
#Note streaming not possible with retreivalQA which is chain 2 and 3

In [None]:
#Streaming-method1 (chain 1)

chunks = []
for chunk in chain.stream("In 3D printing metal, what are the three major contributors to surface roughness"):
    chunks.append(chunk)
    print(chunk, end="", flush=True)
    

In [None]:
#Streaming-method2 (chain 1)

chunks = []
async for chunk in model.astream("In 3D printing metal, what are the three major contributors to surface roughness"):
    chunks.append(chunk)
    print(chunk.content, end="", flush=True)

In [None]:
#streaming method3 (chain 4 and 5)

output = {}
curr_key = None
for chunk in chain.stream("In 3D printing metal, what are the three major contributors to surface roughness"):
    for key in chunk:
        if key not in output:
            output[key] = chunk[key]
        else:
            output[key] += chunk[key]
        if key != curr_key:
            print(f"\n\n{key}: {chunk[key]}", end="", flush=True)
        else:
            print(chunk[key], end="", flush=True)
        curr_key = key
output

In [None]:
#stream chain 6


input = "what advantages does personas have?"
inputs = {"question": input, "chat_history": chat_history}


chunks = []
for chunk in final_chain.stream(inputs):
    chunks.append(chunk)
    if 'answer' in chunk:
        print(chunk['answer'].content, end='')
    else:
        pass


#Below code might not work

# Save the conversation in memory
generated_answer = chunks['answer']

# Load memory to see the conversation history
memory.load_memory_variables({})


memory.save_context(inputs, {"answer": generated_answer.content})


In [None]:
#stream chain 7

input = "what are their disadvantages?"
inputs = {"question": input, "chat_history": chat_history}



chunks = []
for chunk in final_chain.stream(inputs):
    chunks.append(chunk)
    if 'answer' in chunk:
        print(chunk['answer'].content, end='')
    else:
        pass


#Below code might not work


# Save the conversation in memory
#generated_answer = chunks['answer']

from langchain.schema.messages import HumanMessage, AIMessage

chat_history.extend([
    HumanMessage(content=input),
    AIMessage(content=chunks_answer),
    #AIMessage(content=result["answer"].content),
])


# Load memory to see the conversation history
memory.load_memory_variables({})

#memory.save_context(inputs, {"answer": generated_answer.content})
memory.save_context(inputs, {"answer": chunks_answer})

In [16]:
# Delete all collections in the db
vector_db.delete_collection()