In [1]:
import os 
import requests

from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS 
from langchain.document_loaders import TextLoader
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.chains import RetrievalQA
from langchain_community.utilities import SQLDatabase
from langchain_openai import OpenAI
from langchain.agents import Tool
from langchain.agents import create_react_agent, AgentExecutor
from langchain.memory import ConversationBufferMemory

In [2]:
# Defining embedding model and directory to save the embeddings

model_name = "BAAI/bge-small-en-v1.5"

persistant_directory = os.getcwd() + '\\db\\'

In [None]:
# Reading the txt file 

documents = []

file_path = os.getcwd() + "\dataset\odyssey.txt"
loader = TextLoader(file_path, encoding = 'UTF-8')
book_docs = loader.load()

book_docs



In [4]:
# Splitting the txt file into chunks of 250 character with 50 character_overlap

text_splitter = CharacterTextSplitter(chunk_size=250, chunk_overlap=50)
docs = text_splitter.split_documents(book_docs)

Created a chunk of size 491, which is longer than the specified 250
Created a chunk of size 337, which is longer than the specified 250
Created a chunk of size 296, which is longer than the specified 250
Created a chunk of size 428, which is longer than the specified 250
Created a chunk of size 1141, which is longer than the specified 250
Created a chunk of size 539, which is longer than the specified 250
Created a chunk of size 681, which is longer than the specified 250
Created a chunk of size 648, which is longer than the specified 250
Created a chunk of size 2086, which is longer than the specified 250
Created a chunk of size 1121, which is longer than the specified 250
Created a chunk of size 914, which is longer than the specified 250
Created a chunk of size 1366, which is longer than the specified 250
Created a chunk of size 667, which is longer than the specified 250
Created a chunk of size 304, which is longer than the specified 250
Created a chunk of size 569, which is longer

In [7]:
# Loading Open_Ai model

llm = OpenAI(api_key="openai_api_key")

In [None]:
# Inilization of WeaperAPI class so when a request output come it format in proper json format 

# Initialize the API with your key
api_key = "open_weather_api_key"

class WeatherAPI:
    def __init__(self, api_key):
        self.api_key = api_key
        self.base_url = "http://api.openweathermap.org/data/2.5/"

    def get_current_weather(self, city_name, units="metric"):
        """
        Get current weather for a city
        units: metric (Celsius) or imperial (Fahrenheit)
        """
        endpoint = f"{self.base_url}weather"
        params = {
            "q": city_name,
            "appid": self.api_key,
            "units": units
        }
        
        try:
            response = requests.get(endpoint, params=params)
            response.raise_for_status()  # Raise exception for bad status codes
            data = response.json()
            
            return {
                "city": data["name"],
                "temperature": data["main"]["temp"],
                "feels_like": data["main"]["feels_like"],
                "humidity": data["main"]["humidity"],
                "description": data["weather"][0]["description"],
                "wind_speed": data["wind"]["speed"]
            }
            
        except requests.exceptions.RequestException as e:
            return f"Error fetching weather data: {e}"
        
weather_api = WeatherAPI(api_key)

# Get current weather
current_weather = weather_api.get_current_weather("London")
print("Current Weather:", current_weather)

Current Weather: {'city': 'London', 'temperature': 4.94, 'feels_like': 0.19, 'humidity': 78, 'description': 'few clouds', 'wind_speed': 7.72}


In [22]:
def create_rag_retrieval_chain(query: str) -> str:
    """
    Creates a RAG (Retrieval-Augmented Generation) chain for question answering using document retrieval.
    
    Args:
        query (str): The user's question or query text
        
    Returns:
        str: The generated answer based on retrieved relevant documents
    """

    embedding = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")

    db = FAISS.from_documents(docs, embedding)

    retriever = db.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"k":2, "score_threshold": 0.1})

    prompt_template = """Read the Question twice based on the provided context, respond to the question below while adhering to these guidelines:
    1. If the answer is not found, do not speculate. Instead, state, "I do not know the answer"
    2. If the answer is found, provide a clear and concise response in no more than ten sentences.

    {context}

    Question: {question}

    Answer:
    """

    prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

    retriver_ans = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=True,
        chain_type_kwargs={"prompt": prompt}
    )

    return retriver_ans.invoke(query)['result']

def get_current_weather(city_name: str) -> str:
    return weather_api.get_current_weather(city_name)

def get_schame(_):    
    db_url = "mysql+mysqlconnector://root:1234@localhost:3306/employee_management"
    db = SQLDatabase.from_uri(db_url)
    return db.get_table_info()

def run_query(query):
    db_url = "mysql+mysqlconnector://root:1234@localhost:3306/employee_management"
    db = SQLDatabase.from_uri(db_url)
    return db.run(query)

def get_table_information(table_query: str) -> str:
    """
    Generates a natural language response to questions about database tables by:
    1. Converting the question to SQL
    2. Executing the SQL query
    3. Converting the SQL results to natural language
    
    Args:
        table_query (str): The user's question about the database
        
    Returns:
        str: Natural language response based on the SQL query results
    """
    
    template = """
    Based on the table schema below, write SQL query that would answer the user's question:
    NOTE: Make sure you only return SQL query nothing else
    {schema}

    Question: {question}
    SQL Query:
    """

    prompt = ChatPromptTemplate.from_template(template)

    sql_chain = (
        RunnablePassthrough.assign(schema=get_schame)
        | prompt
        | llm
        | StrOutputParser()
    )

    template = """
    Bases on the table schema below, question, sql query, and sql response, write natural language response
    NOTE: just write you response
    {schema}

    Question: {question}
    SQL Query: {query}
    SQL Response: {response}"""

    prompt = ChatPromptTemplate.from_template(template)

    full_chain = (
        RunnablePassthrough.assign(query=sql_chain).assign(
            schema=get_schame,
            response= lambda x: run_query(x["query"])
        )
        | prompt
        | llm
    )

    return full_chain.invoke({"question": table_query})

In [23]:
# Defining all the tools

tools = [
    Tool(
        name="rag_answer",
        func=create_rag_retrieval_chain,
        description="""Use this tool to answer questions about the loaded storybook.
        Input should be a question about the story's content, characters, plot, or themes.
        The tool uses RAG (Retrieval Augmented Generation) to provide accurate answers based on the story text.
        Use this when questions are about the story's content or when you need to find specific information from the story."""
        ),
    Tool(
        name="current_weather",
        func=get_current_weather,
        description="Use this tool to get current weather information for any city. Input should be a city name."
    ),
    Tool(
        name="query_database",
        func=get_table_information,
        description="""Use this tool to query the employee management database and get information in natural language.
        The tool can handle questions about:
        - Employee details (name, salary, position, department, hire date)
        - Department information (name, location, budget, manager)
        - Statistical queries (averages, counts, grouping)
        
        Example questions:
        - "How many employees are in the Engineering department?"
        - "What is the average salary by department?"
        - "Who are the most recently hired employees?"
        - "List all departments and their managers"
        - "Show me employees with salaries above 70000"
        
        Input should be a natural language question about employee or department data.
        The tool will convert your question to SQL, execute it, and return a human-readable response."""
        )
]

In [24]:
# Crating React PromptTemplate  

template = '''Answer the following questions as best you can. You have access to the following tools:
{tools}

Use the following format:
Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
(this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

BEGIN!
Question: {input}
Thought:{agent_scratchpad}'''

prompt = PromptTemplate.from_template(template)

In [None]:
# Inilization of memory to save all the past Conversation

memory = ConversationBufferMemory(memory_key="history", k=3)

In [26]:
# Creating the agent_executor which execute the agent on call

agent = create_react_agent(llm, tools, prompt)

agent_executor = AgentExecutor(agent=agent, tools=tools, memory=memory  ,verbose=True)

In [27]:
# Inferance testing 1

agent_executor.invoke({"input": "Average salary of all department"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m We can use the query_database tool as it can handle statistical queries such as averages.
Action: query_database
Action Input: "What is the average salary by department?"[0m[38;5;200m[1;3m 
    
The average salary for each department is: Engineering - $76,588.24, Marketing - $61,666.67, Human Resources - $57,857.14, Finance - $68,625.00, and Sales - $66,333.33.[0m[32;1m[1;3m Since the question is asking for the average salary of all departments, we can further refine the query to only include the average salary across all departments.
Action: query_database
Action Input: "What is the average salary by department?"[0m[38;5;200m[1;3m 

The average salary for each department is as follows: 
- Engineering: $76,588.24 
- Marketing: $61,666.67 
- Human Resources: $57,857.14 
- Finance: $68,625.00 
- Sales: $66,333.33[0m[32;1m[1;3m Now we have more specific information, but we still need to find the average of all these

{'input': 'Average salary of all department',
 'history': '',
 'output': 'The average salary of all departments is $66,174.88.'}

In [None]:
# Inferance testing 2

agent_executor.invoke({"input": "What was the name of story hero mother"})



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to find the name of the hero's mother
Action: rag_answer
Action Input: What was the name of story hero mother?[0m[36;1m[1;3mThe name of the hero's mother was Anticlea, daughter of Autolycus. She appeared as a ghost when the hero, Telemachus, was communicating with the ghost of his comrade. Telemachus had left his mother alive when he set out for Troy and was moved to tears when he saw her. He held his sword over the blood and refused to let his mother come near until he had asked his questions of Teiresias. Anticlea's presence and conversation with Telemachus shows the strong bond between them and the depth of the hero's emotions towards his mother. [0m[32;1m[1;3m Anticlea, got it!
Final Answer: Anticlea[0m

[1m> Finished chain.[0m


{'input': 'What was the name of story hero mother',
 'history': 'Human: Average salary of all department\nAI: The average salary of all departments is $66,174.88.',
 'output': 'Anticlea'}

In [30]:
# printing all the past conversation

print(memory.load_memory_variables({}))

{'history': 'Human: Average salary of all department\nAI: The average salary of all departments is $66,174.88.\nHuman: What was the name of story hero mother\nAI: Anticlea'}
