## Notebook Contains

A Rag Agent using `Cewai`

## Importing Libraries

In [14]:
# Importing Necessaet Libraries
import os
from crewai import LLM, Agent, Task, Crew
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter

from crewai.tools import BaseTool

In [15]:
# --- Configuration --- #
MODEL_NAME = "ollama/openhermes:latest"
DATA_FILE = "Data"
DATA_FILE = os.path.join(DATA_FILE, "HR_Policy.txt")
VECTORSTORE_DIR = "hr_policy_vectorstore"
EMBEDDING_MODEL = "all-MiniLM-L6-v2"
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 150


## Creating Vector Database

In [16]:
# Loading and Chunking the Data
def load_and_chunk_documents():
    """"Loads the HR policy data from the TXT file and splits it into smaller chunks."""
    print("\t --- Step 1: Loading and Chunking Data ---")

    if not os.path.exists(DATA_FILE):
        print(f"Data file {DATA_FILE} does not exist.")
        return None

    # Read the text file
    with open(DATA_FILE, 'r', encoding='utf-8') as file:
        full_text = file.read()
    
    # Split the text into chunks
    documents = [full_text]

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size= CHUNK_SIZE,
        chunk_overlap= CHUNK_OVERLAP
    )

    chunks = text_splitter.create_documents(documents)
    print(f"Data loaded and split into {len(chunks)} chunks.")
    return chunks
    

In [17]:
# Creating and embedding the Vector Store
def create_and_store_embeddings(chunks):
    """Creates a vector store from the chunks of data."""
    print("\t --- Step 2: Creating Vector Store ---")

    if not chunks:
        print("No chunks to create vector store.")
        return None

    # Initialize the embedding model from HuggingFace
    print(f"Loading embedding model: '{EMBEDDING_MODEL}'...")
    embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)

    # Create the Chroma vector store
    # This will process all chunks and store their vector representations.
    # It will be persisted to disk in the VECTORSTORE_DIR.
    print(f"Creating vector store in '{VECTORSTORE_DIR}'...")
    Chroma.from_documents(
        chunks,
        embeddings,
        persist_directory=VECTORSTORE_DIR
    )
    print("Vector store created and persisted successfully.")

In [18]:
# Manually download the dataset first as per the instructions in the function.
doc_chunks = load_and_chunk_documents()
if doc_chunks:
    create_and_store_embeddings(doc_chunks)
print("\n\t--- Data Ingestion Complete ---")
print(f"Vector store is ready in the '{VECTORSTORE_DIR}' directory.")

	 --- Step 1: Loading and Chunking Data ---
Data loaded and split into 25 chunks.
	 --- Step 2: Creating Vector Store ---
Loading embedding model: 'all-MiniLM-L6-v2'...
Creating vector store in 'hr_policy_vectorstore'...
Vector store created and persisted successfully.

	--- Data Ingestion Complete ---
Vector store is ready in the 'hr_policy_vectorstore' directory.


In [19]:
# --- 1. RAG Agent with Crew.ai --- #
# Initialize embeddings and vector store once to be used by the tool
print("Loading embeddings and vector store for RAG tool...")
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
vectorstore = Chroma(
    persist_directory=VECTORSTORE_DIR,
    embedding_function=embeddings
)
retiver = vectorstore.as_retriever(search_kwargs={"k": 3}) # Retrieve top 3 results

Loading embeddings and vector store for RAG tool...


## Defining Tools

In [20]:
# Defining retrieval tool
class RetrieveHRPolicy(BaseTool):
    name: str = "RetrieveHRPolicy"
    description: str = "Retrives relevant HR policy information based on the query."

    def _run(self, query: str) -> str:
        """Run the retrieval tool to get relevant HR policy information."""
        # print(f"Retrieving information for query: {query}")
        results = retiver.invoke(query)

        if not results:
            return "No relevant information found in the vector database."
        
        context = "\n".join([doc.page_content for doc in results])
        # print("\t--- RAG Tool Finished ---")
        return f"Retrieved context: \n{context}"

In [21]:
retrival_tool = RetrieveHRPolicy()

retrival_tool._run("What is the process for approval to request time off?")  # Test the tool

"Retrieved context: \nFAQ's\n\nFAQ: Human Resources\n\nQ1: How do I request time off and what is the process for approval?\n\nA1: To request time off, employees should follow the designated process outlined in the employee handbook. Typically, this involves submitting a request through an online system or directly to their immediate supervisor or the HR department. The request should include the desired dates and the reason for the time off. Approval is subject to manager discretion and business needs, and employees will receive a response confirming or denying their request.\n\nQ2: How can I update my personal information, such as address or emergency contacts?\n\nA2: To update personal information, employees should notify the HR department or use the designated employee self-service portal, if available. HR will provide the necessary forms or instructions to make the updates. It is essential to keep personal information current to ensure effective communication and for emergency situ

## Initializing LLM

In [22]:
# --- Configuring the LLM --- #
llm = LLM(
    model= MODEL_NAME,
    temperature=0.7
)


In [23]:
# Instantiate the tools
retrival_tool = RetrieveHRPolicy()

# define Agent first
agent = Agent(
    role="RAG Agent",
    goal="""Answering user queries using the retrieval tool. If answer not found, return 'I don't know the answer.'""",
    backstory= """You are a helpful assistant. You can answer user queries from vector database created from HR policies.""",
    # verbose=True,
    verbose=False,
    tools=[retrival_tool],
    llm=llm
)

# Define Task
task = Task(
    description=f"""Following is a user Question. Answer this query from HR Document policies.
    User Query: {{query}}""",
    expected_output="Return answer in natural language.",
    agent=agent
)

# Define Crew
crew = Crew(
    agents=[agent],
    tasks=[task],
    # verbose=True
    verbose=False
)

In [24]:
# # Checking if Crew is ready
# print(crew.kickoff(inputs={"query": "What is the process for approval to request time off?"}))

In [25]:
# Defning Main Function
def main():
    """Main function to kickoff the Crew."""
    print("\n\t--- Starting Crew for RAG Agent ---")
    
    while True:
        user_query = input("Enter your query (or type 'exit' to quit): ")
        print(f"\n🙎‍♂️: {user_query}")
        if user_query.lower() == 'exit':
            print("Exiting the Crew. Goodbye!")
            break
        
        # Kickoff the Crew with the user query
        response = crew.kickoff(inputs={"query": user_query})
        print(f"🤖: {response}")

In [26]:
main()


	--- Starting Crew for RAG Agent ---

🙎‍♂️: How do I request time off and what is the process for approval?
🤖: To request time off, you should follow the designated process outlined in the employee handbook. Typically, this involves submitting a request through an online system or directly to your immediate supervisor or the HR department. The request should include the desired dates and the reason for the time off. Approval is subject to manager discretion and business needs, and you will receive a response confirming or denying your request.

🙎‍♂️: How can I update my personal information, such as address or emergency contacts?
🤖: To update your personal information, such as address or emergency contacts, you should notify the HR department or use the designated employee self-service portal, if available. HR will provide the necessary forms or instructions to make the updates. It is essential to keep personal information current to ensure effective communication and for emergency si