In [None]:
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.retrievers import BM25Retriever
from langchain_classic.retrievers import EnsembleRetriever
from langchain_classic.schema import Document
from langchain_classic.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_classic.chains.retrieval import create_retrieval_chain
from IPython.display import Markdown, display

In [44]:
loader = PyPDFLoader("../Surface_Treatment_of_Metals.pdf")
docs = loader.load()

In [45]:
embeddin_model = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")

In [58]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000 , 
    chunk_overlap = 100
)

docs = splitter.split_documents(docs)

### Sparse Dense Technique

In [59]:
dense_vector_store = FAISS.from_documents(docs,embeddin_model)
dense_retriever = dense_vector_store.as_retriever()

In [80]:
sparse_retriever = BM25Retriever.from_documents(docs)
sparse_retriever.k = 9

In [81]:
hybrid_reetriever = EnsembleRetriever(
    retrievers=[dense_retriever,sparse_retriever],
    weights=[0.7,0.3]
)

In [82]:
def hybrid_search(query) : 
    results = hybrid_reetriever.invoke(query)

    for i , doc in enumerate(results) : 
        print(f"\n üîπDocument {i+1} :\n{doc.page_content}")

In [83]:
hybrid_search("Metal surface treatment")


 üîπDocument 1 :
cutting-edge research in the Ô¨Åeld of surface treatment for metals.

 üîπDocument 2 :
knowledge of the surface treatment of metals.

 üîπDocument 3 :
Surface Treatment of Metals
Petrica Vizureanu 1,2

 üîπDocument 4 :
Citation: Vizureanu, P . Surface
Treatment of Metals. Coatings 2022,

 üîπDocument 5 :
One study demonstrated an interesting approach to diffusion surface treatment fol-

 üîπDocument 6 :
Nowadays, many surface treatment technologies are available in addition to advanced

 üîπDocument 7 :
lowed by a laser heat treatment (LHT) procedure [ 7]. Low-alloy and medium-carbon

 üîπDocument 8 :
ageing for surface Ô¨Ånish applications was performed. The results show that the addition

 üîπDocument 9 :
study [3] highlighted a methodology to design the best contact proÔ¨Åle of the surface for

 üîπDocument 10 :
Blocking at the Metal‚ÄìMould Interface. Coatings 2020, 10, 680. [CrossRef]

 üîπDocument 11 :
and Mechanical Properties. Coatings 2020, 10, 824

### Re-Rank Technique

In [84]:
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate , ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

llm = ChatGroq(model="openai/gpt-oss-120b",temperature=0)

In [85]:
rerank_prompt_template = PromptTemplate.from_template(
"""You are a helpful assistant. your task is to rank the following documents from most to least relevant to the question
User Question : {question}

Documents : {documents}

Instructions :
- Think about the relevance of each document to the user's question.
- Return a list of document indices in ranked order, starting from the most relevant

output format : comma-separated document indices (e.g., 2,3,5,1,6,0,...)
"""
)

In [86]:
query = "What is the metal surface treatment"

In [87]:
# use any kinds of retrievals
retrieved_docs = dense_retriever.invoke(query)
retrieved_docs

[Document(id='4cc37b00-9801-4697-b0dc-77404d15362a', metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2022-04-21T20:10:03+08:00', 'author': 'Petrica Vizureanu', 'keywords': '', 'moddate': '2022-04-22T02:33:21+02:00', 'subject': '', 'title': 'Surface Treatment of Metals', 'source': '../Surface_Treatment_of_Metals.pdf', 'total_pages': 2, 'page': 1, 'page_label': '2'}, page_content='knowledge of the surface treatment of metals.'),
 Document(id='be471388-ac72-4094-9234-49797319572b', metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2022-04-21T20:10:03+08:00', 'author': 'Petrica Vizureanu', 'keywords': '', 'moddate': '2022-04-22T02:33:21+02:00', 'subject': '', 'title': 'Surface Treatment of Metals', 'source': '../Surface_Treatment_of_Metals.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content='cutting-edge research in the Ô¨Åeld of surface treatment for metals.'),
 Document(id='2d3fd9e2-a0ce-4

In [88]:
chain = rerank_prompt_template | llm | StrOutputParser()

In [89]:
res = chain.invoke({'question' : query , 'documents':retrieved_docs})

In [90]:
res = [int(s) for s in res.split(",")]

res

[0, 1, 2, 3]

In [91]:
reranked_docs = [retrieved_docs[i] for i in res if 0 <= i <len(retrieved_docs)]
reranked_docs

[Document(id='4cc37b00-9801-4697-b0dc-77404d15362a', metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2022-04-21T20:10:03+08:00', 'author': 'Petrica Vizureanu', 'keywords': '', 'moddate': '2022-04-22T02:33:21+02:00', 'subject': '', 'title': 'Surface Treatment of Metals', 'source': '../Surface_Treatment_of_Metals.pdf', 'total_pages': 2, 'page': 1, 'page_label': '2'}, page_content='knowledge of the surface treatment of metals.'),
 Document(id='be471388-ac72-4094-9234-49797319572b', metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2022-04-21T20:10:03+08:00', 'author': 'Petrica Vizureanu', 'keywords': '', 'moddate': '2022-04-22T02:33:21+02:00', 'subject': '', 'title': 'Surface Treatment of Metals', 'source': '../Surface_Treatment_of_Metals.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content='cutting-edge research in the Ô¨Åeld of surface treatment for metals.'),
 Document(id='2d3fd9e2-a0ce-4

## MMR [Maximal Marginal Relevant]

In [113]:
mmr_retriever = dense_vector_store.as_retriever(
    search_type = "mmr" , 
    search_kwargs = {"k":4}
)

In [114]:
prompt = PromptTemplate.from_template(
    """Answer the question based on the context provided.

    Context : {context}

    Question : {input} 
    """
)

In [115]:
doc_chain = create_stuff_documents_chain(llm,prompt)
rag_chain = create_retrieval_chain(retriever=mmr_retriever,combine_docs_chain=doc_chain)

In [116]:
response = rag_chain.invoke({'input' : query})

In [117]:
print(display(Markdown(response['answer'])))

The metal surface is being **anodically oxidised (anodised) ‚Äì i.e., an anodic oxidation / anodizing treatment of the titanium substrate**. This surface‚Äëtreatment creates a hard, tightly‚Äëbonded oxide layer that serves as a highly adherent coating, improving the metal‚Äôs behaviour in the mould‚Äëmetal interface.

None
