## Importing Libraries

In [1]:
from langchain_ollama import ChatOllama

from langchain.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma
from langchain_community.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever 

from langchain_core.prompts import MessagesPlaceholder
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_history_aware_retriever
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

from langchain_core.tools import tool
import datetime

import warnings
import logging


In [2]:

# Suppress LangChain deprecation warnings
warnings.filterwarnings("ignore")

# Suppress httpx logging
logging.getLogger("httpx").setLevel(logging.WARNING)

## Model

In [3]:
# Initialize the Ollama LLM
model = ChatOllama(model="llama3.2")

### Document Loading and Splitting

In [4]:
loader = PyPDFLoader("leave.pdf")
docs = loader.load()

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
all_splits = text_splitter.split_documents(docs)

### Embedding

In [6]:
embeddings = OllamaEmbeddings(model="llama3.2")

### Vector Search and bm25 : Hybrid Retreiver

In [7]:
vector_store = Chroma(embedding_function=embeddings)
_ = vector_store.add_documents(documents=all_splits)

vector_retriever = vector_store.as_retriever(type = "similarity", search_kwargs = {"k" : 5})

In [8]:
bm25_retriever = BM25Retriever.from_documents(all_splits)
bm25_retriever.k = 5

In [9]:
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, vector_retriever],weights=[0.4, 0.6])

In [10]:
ensemble_retriever.get_relevant_documents("hello")

[Document(metadata={'source': 'leave.pdf', 'page': 2, 'page_label': '3'}, page_content='*Note: The company reserves the right tochange theleavepolicy.Anyremaining/unusedannualleaveand/or sick leave shall be adjustedas leaves takenonSundaysastwo-dayweekendisanaddedfacilityprovidedbyBajraandnotmandatedbylaw.'),
 Document(id='2e3bc9dc-7cd6-4d3f-84b2-79445c6911e2', metadata={'page': 2, 'page_label': '3', 'source': 'leave.pdf'}, page_content='18.Anemployeemustassurethatatleast75%oftheteammembersarepresentduringaliveproject.Forexample,iftherearefourmembersinateam,atleast¾oftheprojectmembersshouldbeavailable;youmaynottakealeavedayifoneoftheteammembersisalreadyonleave.However,'),
 Document(id='8a3877b9-8bc7-4a96-9182-4d4da6b08acc', metadata={'page': 1, 'page_label': '2', 'source': 'leave.pdf'}, page_content='FloatingFestiveleaves- 4days(Youcanchoosebetweenanyfestival/publicholidaysyoucelebrate.ThenumberofFloatingleavedependsonthenumberofDashainandTiharholidayssincetheleavesallocatedmaychangeac

### Reranking

In [11]:
from sentence_transformers import CrossEncoder
from pydantic import BaseModel
from langchain.schema import BaseRetriever, Document
from typing import List

In [12]:
# Initialize the Cross-Encoder model
cross_encoder = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")

def rerank_docs(query, docs, top_k=10):
    # Create query-document pairs for scoring
    query_doc_pairs = [(query, doc.page_content) for doc in docs]
    
    # Get scores from the Cross-Encoder
    scores = cross_encoder.predict(query_doc_pairs)
    
    # Sort documents by their scores in descending order
    sorted_docs = [doc for _, doc in sorted(zip(scores, docs), reverse=True)]
    
    # Return the top-k reranked documents
    return sorted_docs[:top_k]

In [13]:

# Define the Cross-Encoder reranking retriever as a Pydantic model
class CrossEncoderRerankingRetriever(BaseRetriever, BaseModel):
    base_retriever: BaseRetriever
    reranker: callable

    def get_relevant_documents(self, query: str, *, top_k: int = 10) -> List[Document]:
        
        #  Retrieve documents using the base retriever
        retrieved_docs = self.base_retriever.get_relevant_documents(query, top_k=20)  # Retrieve more docs initially
        
        # Rerank the retrieved documents using the Cross-Encoder
        reranked_docs = self.reranker(query, retrieved_docs, top_k=top_k)
        
        return reranked_docs

In [14]:
# Create the retriever
cross_encoder_retriever = CrossEncoderRerankingRetriever(
    base_retriever=ensemble_retriever,
    reranker=rerank_docs
)

In [15]:
cross_encoder_retriever.get_relevant_documents("hello", top_k=3)

[Document(metadata={'source': 'leave.pdf', 'page': 0, 'page_label': '1'}, page_content='3. Atotalof26workingdays(or208workinghours) inayearisthemaximumleaveanemployeecanearnwhichisequivalentto12daysofsickleaveand14daysofannualleave.\n4. Annualleave:AnemployeerequestingAnnualleaveforupto3daysneedstoapply5workingdayspriortothedesiredleaveday/s.'),
 Document(metadata={'source': 'leave.pdf', 'page': 0, 'page_label': '1'}, page_content='IfanemployeehastorequestanAnnualleaveoffourormoredaysinarow,s/hemustinformtheirsupervisorandtheHRdepartmentatleastfourweeksor20workingdaysahead.Thisis becauselongleavesdirectlyimpacttheefficiencyoftheprojectthatanemployeeisinvolvedin.'),
 Document(id='8a3877b9-8bc7-4a96-9182-4d4da6b08acc', metadata={'page': 1, 'page_label': '2', 'source': 'leave.pdf'}, page_content='FloatingFestiveleaves- 4days(Youcanchoosebetweenanyfestival/publicholidaysyoucelebrate.ThenumberofFloatingleavedependsonthenumberofDashainandTiharholidayssincetheleavesallocatedmaychangeaccording

## Conversational RAG (History Aware)

In [16]:
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

In [17]:
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)
history_aware_retriever = create_history_aware_retriever(
    model, ensemble_retriever, contextualize_q_prompt
)

In [18]:

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(model, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [19]:
store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

# Inference

In [20]:
# Assuming conversational_rag_chain is already defined and configured

def chatbot_conversation():
    session_id = "abc123"  # Unique session ID for the conversation
    print("Chatbot: Hello! How can I assist you today?")
    
    while True:
        user_input = input("You: ")
        
        if user_input.lower() in ["exit", "quit", "bye"]:
            print("Chatbot: Goodbye! Have a great day!")
            break
        
        # Invoke the conversational RAG chain with the user's input
        response = conversational_rag_chain.invoke(
            {"input": user_input},
            config={"configurable": {"session_id": session_id}},
        )["answer"]
        
        # Display the user's question and the chatbot's response
        print(f"You: {user_input}")
        print(f"Chatbot: {response}\n")

# Start the chatbot conversation
chatbot_conversation()

Chatbot: Hello! How can I assist you today?
You: hello
Chatbot: Hello! How can I assist you today? Do you have a question about the leave policy at your workplace or would you like to ask something specific related to it?

You: what are the types of leave
Chatbot: Based on the provided context, here are the types of leave mentioned:

1. Annual Leave: up to 3 days in advance need to be applied for
2. Sick Leave: equivalent to 12 days
3. Compassionate Leave: maximum of 13 days (for loss of immediate family members)
4. Floating Festive Leaves: 4 days, can choose between any festival/public holiday celebrated
5. Maternity Leave: up to 98 days paid leave during the maternity period
6. Paternity Leave: up to 15 paid leaves during the spouse's maternity period

You: explain point number 4
Chatbot: Point number 4 mentions "Floating Festive Leaves". Here's a brief explanation:

* Number of Floating Leaves depends on the number of Dashain and Tihar holidays.
* The leaves are allocated according 

# Function Calling

In [21]:
@tool
def generate_leave_comment(reason: str, leave_type: str, start_date: str, end_date: str) -> str:
    """
    Generate a structured leave comment based on the provided reason, leave type,
    start and end dates.
    """
    
    try:
        # Validate and parse dates
        start = datetime.datetime.strptime(start_date, "%Y-%m-%d")
        end = datetime.datetime.strptime(end_date, "%Y-%m-%d")
    except ValueError:
        return "Invalid date format. Please use YYYY-MM-DD."
    
    # Calculate the number of leave days
    days_requested = (end - start).days + 1

    # Generate the leave comment
    leave_comment = f"Leave Type: {leave_type.capitalize()}\n" \
                    f"Leave Dates: {start_date} to {end_date}\n" \
                    f"Reason: {reason}\n" \
                    f"Total Days: {days_requested}"
    
    print(leave_comment)
    
    return leave_comment

tools = [generate_leave_comment]

In [38]:
from langchain_experimental.llms.ollama_functions import OllamaFunctions

model = OllamaFunctions(
    model="llama3.2", 
    format="json"
    )

In [39]:
new_model = model.bind_tools(tools)

In [40]:
msg = new_model.invoke("hello")
msg.tool_calls

[]

In [41]:
msg = new_model.invoke("Can you create a leave comment for sick leave from dec 1 22 to dec 5 22?")
msg.tool_calls

[{'name': 'generate_leave_comment',
  'args': {'reason': 'Sick Leave',
   'leave_type': 'Sick Leave',
   'start_date': '2022-12-01',
   'end_date': '2022-12-05'},
  'id': 'call_bd725904cabd40fdb4451831d14ddf5f',
  'type': 'tool_call'}]

## Chat history in SQLite3

In [25]:
from langchain_community.chat_message_histories import SQLChatMessageHistory

conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    lambda session_id: SQLChatMessageHistory(
        session_id=session_id, connection_string="sqlite:///sqlite.db"
    ),
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [26]:
# Define the session ID for the conversation
config = {"configurable": {"session_id": "session1"}}

In [28]:
# Start the chatbot loop
print("Chatbot: Hello! How can I assist you today? (Type 'exit' to end the conversation)")

while True:
    # Get user input
    user_input = input("You: ")
    
    # Exit the loop if the user types 'exit' or 'quit'
    if user_input.lower() in ["exit", "quit"]:
        print("Chatbot: Goodbye! Have a great day!")
        break
    
    # Invoke the conversational RAG chain with the user input
    response = conversational_rag_chain.invoke({"input": user_input}, config=config)
    
    # Extract and display the chatbot's response
    chatbot_response = response.get("answer", "I'm sorry, I couldn't generate a response.")
    print(f"Chatbot: {chatbot_response}")

Chatbot: Hello! How can I assist you today? (Type 'exit' to end the conversation)
Chatbot: Déjà vu! You're asking me again, and the answer remains the same: I don't know your last name, but you told me your first name is Sardul.
Chatbot: Goodbye! Have a great day!
