In [1]:
import os 
import requests

from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS 
from langchain.document_loaders import TextLoader
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.chains import RetrievalQA
from langchain_community.utilities import SQLDatabase
from langchain_openai import OpenAI
from langchain.agents import Tool
from langchain.agents import create_react_agent, AgentExecutor
from langchain.memory import ConversationBufferWindowMemory
from dotenv import load_dotenv

ModuleNotFoundError: No module named 'langchain_openai'

In [21]:
# Defining embedding model and directory to save the embeddings

model_name = "BAAI/bge-small-en-v1.5"
persistant_directory = os.getcwd() + '\\db\\'
documents = []
file_name = os.getcwd() + "/dataset/odyssey.txt"
embedding_model = "sentence-transformers/all-MiniLM-L6-v2"

# Reading the txt file 
def load_txt_file(file_path):  
    loader = TextLoader(file_path, encoding = 'UTF-8')
    book_docs = loader.load()
    return book_docs

# Splitting the txt file into chunks of 1000 character with 200 character_overlap
def split_doc_into_chunks(file_path):
    documents = load_txt_file(file_path)

    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    docs = text_splitter.split_documents(documents)
    return docs

def save_vector_store(docs, folder_path):
    # use when you are using OpenAI model
    # embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

    # using ChatGroq Llama model
    
    vector_store = FAISS.from_documents(docs, embedding_model)
    
    # Save the FAISS index to the specified folder
    os.makedirs(folder_path, exist_ok=True)
    vector_store.save_local(folder_path)
    print(f"Vector store saved to {folder_path}")

In [22]:
def save_embeddings_to_db(folder_path, model_name):

    faiss_index_path = os.getcwd() + "\db\index.faiss"
    faiss_pkl_path = os.getcwd() + "\db\index.pkl"

    if os.path.exists(faiss_index_path) and os.path.exists(faiss_pkl_path):
        return f"FAISS vector store already exists in {folder_path}. Skipping save."
    else:
        # Initialize Llama embeddings using Hugging Face
        embeddings = HuggingFaceEmbeddings(model_name=model_name)
        
        docs = split_doc_into_chunks(file_name)
        # Generate embeddings and create FAISS vector store
        vector_store = FAISS.from_documents(docs, embeddings)
        
        # Save the FAISS vector store locally in the db folder
        os.makedirs(folder_path, exist_ok=True)
        vector_store.save_local(folder_path)
        return f"FAISS vector store saved in {folder_path}. Skipping save."
    
save_embeddings_to_db(persistant_directory, model_name)

  embeddings = HuggingFaceEmbeddings(model_name=model_name)
  from tqdm.autonotebook import tqdm, trange
Created a chunk of size 1141, which is longer than the specified 1000
Created a chunk of size 2086, which is longer than the specified 1000
Created a chunk of size 1121, which is longer than the specified 1000
Created a chunk of size 1366, which is longer than the specified 1000
Created a chunk of size 1011, which is longer than the specified 1000
Created a chunk of size 1639, which is longer than the specified 1000
Created a chunk of size 1219, which is longer than the specified 1000
Created a chunk of size 1875, which is longer than the specified 1000
Created a chunk of size 1307, which is longer than the specified 1000
Created a chunk of size 2271, which is longer than the specified 1000
Created a chunk of size 1430, which is longer than the specified 1000
Created a chunk of size 1763, which is longer than the specified 1000
Created a chunk of size 1575, which is longer than the 

'FAISS vector store saved in c:\\Users\\ADMIN\\Tution\\Langchain_Leaern\\Project\\db\\. Skipping save.'

In [23]:
load_dotenv()

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [24]:
from langchain_groq import ChatGroq

# llm = OpenAI()

llm = ChatGroq(model="Llama-3.3-70b-Versatile")
llm.invoke("who are you")

AIMessage(content='I\'m an artificial intelligence model known as Llama. Llama stands for "Large Language Model Meta AI."', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 38, 'total_tokens': 61, 'completion_time': 0.109223244, 'prompt_time': 0.012459811, 'queue_time': 0.052747063, 'total_time': 0.121683055}, 'model_name': 'Llama-3.3-70b-Versatile', 'system_fingerprint': 'fp_c0cfa69934', 'finish_reason': 'stop', 'logprobs': None}, id='run-7702067e-73c2-4de9-8f87-9d843ea4cfbc-0', usage_metadata={'input_tokens': 38, 'output_tokens': 23, 'total_tokens': 61})

In [25]:
# Inilization of WeaperAPI class so when a request output come it format in proper json format 

def get_current_weather(city_name: str) -> str:
    """
    Get current weather for a city
    units: metric (Celsius) or imperial (Fahrenheit)
    """
    endpoint = f"http://api.openweathermap.org/data/2.5/weather"
    params = {
        "q": city_name,
        "appid": weather_api_key,
        "units": "metric"
    }
    
    try:
        response = requests.get(endpoint, params=params)
        response.raise_for_status()  # Raise exception for bad status codes
        data = response.json()
        
        return {
            "city": data["name"],
            "temperature": data["main"]["temp"],
            "feels_like": data["main"]["feels_like"],
            "humidity": data["main"]["humidity"],
            "description": data["weather"][0]["description"],
            "wind_speed": data["wind"]["speed"]
        }
        
    except requests.exceptions.RequestException as e:
        return f"Error fetching weather data: {e}"

In [26]:
# top_k=3
# query="who is auther mother"

# def retrieve_relevant_documents(query, folder_path, top_k, model_name):
#     """
#     Retrieves relevant documents from the FAISS vector store.

#     Args:
#         folder_path (str): Path to the folder where the vector store is saved.
#         query (str): The query to search for relevant documents.
#         top_k (int): Number of top relevant documents to retrieve.
#         model_name (str): The Hugging Face model used for embeddings.

#     Returns:
#         list: A list of the top-k relevant documents.
#     """
#     # Initialize Llama embeddings using Hugging Face
#     embeddings = HuggingFaceEmbeddings(model_name=model_name)
    
#     # Load the FAISS vector store
#     vector_store = FAISS.load_local(folder_path, embeddings, allow_dangerous_deserialization=True)
#     print(f"Vector store loaded from {folder_path}")
    
#     # Perform similarity search
#     results = vector_store.similarity_search(query, k=top_k)
#     return results

# retrieve_relevant_documents(query, persistant_directory, 2, model_name)

In [27]:
def create_rag_retrieval_chain():
    """
    Initializes the RAG Retrieval QA chain with the FAISS index from the 'db' folder.
    """
    # Load the FAISS index
    embedding = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
    db = FAISS.load_local("db", embedding, allow_dangerous_deserialization=True)

    # Define the retriever
    retriever = db.as_retriever(
        search_type="similarity_score_threshold",
        search_kwargs={"k": 2, "score_threshold": 0.1}
    )

    # Define the prompt
    prompt_template = """Read the Question twice based on the provided context, respond to the question below while adhering to these guidelines:
    1. If the answer is not found, do not speculate. Instead, state, "I do not know the answer."
    2. If the answer is found, provide a clear and concise response in no more than ten sentences.

    {context}

    Question: {question}

    Answer:
    """
    prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

    # Create the Retrieval QA chain
    retriever_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=True,
        chain_type_kwargs={"prompt": prompt}
    )

    return retriever_chain

weather_api_key = os.getenv("WEATHER_API_KEY")

def get_current_weather(city_name: str) -> str:
    """
    Get current weather for a city
    units: metric (Celsius) or imperial (Fahrenheit)
    """
    endpoint = f"http://api.openweathermap.org/data/2.5/weather"
    params = {
        "q": city_name,
        "appid": weather_api_key,
        "units": "metric"
    }
    
    try:
        response = requests.get(endpoint, params=params)
        response.raise_for_status()  # Raise exception for bad status codes
        data = response.json()
        
        return {
            "city": data["name"],
            "temperature": data["main"]["temp"],
            "feels_like": data["main"]["feels_like"],
            "humidity": data["main"]["humidity"],
            "description": data["weather"][0]["description"],
            "wind_speed": data["wind"]["speed"]
        }
        
    except requests.exceptions.RequestException as e:
        return f"Error fetching weather data: {e}"

def get_schame(_):    
    db_url = "mysql+mysqlconnector://root:1234@localhost:3306/employee_management"
    db = SQLDatabase.from_uri(db_url)
    return db.get_table_info()

def run_query(query):
    db_url = "mysql+mysqlconnector://root:1234@localhost:3306/employee_management"
    db = SQLDatabase.from_uri(db_url)
    return db.run(query)

def get_table_information(table_query: str) -> str:
    """
    Generates a natural language response to questions about database tables by:
    1. Converting the question to SQL
    2. Executing the SQL query
    3. Converting the SQL results to natural language
    
    Args:
        table_query (str): The user's question about the database
        
    Returns:
        str: Natural language response based on the SQL query results
    """
    
    template = """
    Based on the table schema below, write a valid SQL query that answers the user's question:
    NOTE: Ensure the query is syntactically correct and does not include markdown or extraneous text.
    {schema}

    Question: {question}
    SQL Query:
    """

    prompt = ChatPromptTemplate.from_template(template)

    sql_chain = (
        RunnablePassthrough.assign(schema=get_schame)
        | prompt
        | llm
        | StrOutputParser()
    )

    template = """
    Based on the table schema, user's question, SQL query, and query results, write a concise and accurate natural language response:
    {schema}

    Question: {question}
    SQL Query: {query}
    SQL Response: {response}
    Response:
    """

    prompt = ChatPromptTemplate.from_template(template)

    full_chain = (
        RunnablePassthrough.assign(query=sql_chain).assign(
            schema=get_schame,
            response= lambda x: run_query(x["query"])
        )
        | prompt
        | llm
    )

    return full_chain.invoke({"question": table_query})

In [30]:
def run_query(query): 
    # Connect to the database
    connection = mysql.connector.connect(
        user="root",
        password="1234",
        host="127.0.0.1",
        port="3306",  # Default MySQL port
        database="telecom_db"
    )

    cursor = connection.cursor()

    try:
        # Execute the query
        cursor.execute(query)
        # Fetch all rows from the query result
        result = cursor.fetchall()
        return result
    except Exception as e:
        # Handle errors and return the error message
        return f"An error occurred: {str(e)}"
    finally:
        # Close the connection
        connection.close()

query = "SELECT * FROM call_data_records"
output = run_query(query)
print(output)

[(1, 1, datetime.datetime(2024, 4, 16, 20, 58, 6), datetime.datetime(2024, 5, 4, 21, 2, 59), '595-898-3808x963', '001-700-825-0718x38604', 2487, Decimal('44.99')), (2, 48, datetime.datetime(2024, 7, 4, 21, 45, 22), datetime.datetime(2024, 8, 19, 15, 7, 28), '(706)024-1538x3012', '769.200.5537', 1411, Decimal('17.64')), (3, 29, datetime.datetime(2024, 9, 30, 22, 28, 51), datetime.datetime(2024, 6, 20, 5, 58, 48), '996-813-4197x7204', '001-719-167-5836x240', 1565, Decimal('46.55')), (4, 50, datetime.datetime(2024, 8, 28, 18, 45), datetime.datetime(2024, 6, 4, 19, 9, 59), '+1-889-620-8735x1985', '710-760-3174x34148', 2620, Decimal('2.21')), (5, 37, datetime.datetime(2024, 7, 12, 17, 21, 12), datetime.datetime(2024, 1, 28, 6, 29, 54), '715-593-0574x0122', '206-484-7237x7914', 2377, Decimal('32.15')), (6, 20, datetime.datetime(2024, 4, 25, 1, 7, 8), datetime.datetime(2024, 2, 5, 21, 6, 44), '709-028-4766x774', '9389653135', 2151, Decimal('14.83')), (7, 7, datetime.datetime(2024, 6, 26, 11, 

In [28]:
def run_query(query):
    db_url = "mysql+mysqlconnector://root:1234@localhost:3306/telecom_db"
    db = SQLDatabase.from_uri(db_url)
    return db.run(query)

run_query("SELECT * FROM customer_info")

"[(1, 'Natalie', 'Pierce', 'anthonyhartman@obrien.com', '+1-804-457-5152', datetime.date(2003, 10, 29), '8618 Amy Rapids Port Christian, NM 37068'), (2, 'Heather', 'Collins', 'jo31@hotmail.com', '001-452-918-0558x376', datetime.date(2015, 2, 5), '866 Cordova Canyon Simpsonberg, MA 77319'), (3, 'Jacqueline', 'Brown', 'christopher56@mccarty.com', '001-065-718-2695x66715', datetime.date(1947, 4, 7), '245 Kirk Mountain Suite 538 Kelleytown, NH 27835'), (4, 'Leslie', 'Rodriguez', 'williamsbrandon@russell.net', '(529)676-8492x11758', datetime.date(1921, 5, 2), '429 Robert Canyon Port Josephland, PA 14901'), (5, 'Bailey', 'Odom', 'latoya96@gmail.com', '001-635-938-2553x052', datetime.date(2004, 5, 3), '6075 Glenn Meadow South Sarah, MD 93195'), (6, 'Amanda', 'Decker', 'kmoyer@yahoo.com', '7906465435', datetime.date(2012, 1, 2), '4947 Ryan Village South Benjamin, SC 76206'), (7, 'Matthew', 'Scott', 'ramseyrobert@gmail.com', '367.201.7207', datetime.date(2002, 12, 26), '606 Howell Junctions Apt

In [29]:
# Defining all the tools
tools = [
    # Tool(
    #     name="rag_answer",
    #     func=create_rag_retrieval_chain,
    #     description="""Use this tool to answer questions about the loaded storybook.
    #     Input should be a question about the story's content, characters, plot, or themes.
    #     The tool uses RAG (Retrieval Augmented Generation) to provide accurate answers based on the story text.
    #     Use this when questions are about the story's content or when you need to find specific information from the story."""
    # ),
    Tool(
        name="rag_answer",
        func=lambda query: create_rag_retrieval_chain().invoke(query),
        description="""Use this tool to answer questions about the loaded storybook.
        Input should be a question about the story's content, characters, plot, or themes.
        This tool uses Retrieval Augmented Generation (RAG) to provide accurate answers."""
    ),
    Tool(
        name="current_weather",
        func=get_current_weather,
        description="Use this tool to get current weather information for any city. Input should be a city name."
    ),
    Tool(
        name="query_database",
        func=get_table_information,
        description="""Use this tool to query the employee management database and get information in natural language.
        The tool can handle questions about:
        - Employee details (name, salary, position, department, hire date)
        - Department information (name, location, budget, manager)
        - Statistical queries (averages, counts, grouping)
        
        Example questions:
        - "How many employees are in the Engineering department?"
        - "What is the average salary by department?"
        - "Who are the most recently hired employees?"
        - "List all departments and their managers"
        - "Show me employees with salaries above 70000"
        
        Input should be a natural language question about employee or department data.
        The tool will convert your question to SQL, execute it, and return a human-readable response."""
    )
]

# Creating React PromptTemplate
template = '''Answer the following questions as best you can. You have access to the following tools:
{tools}

Use the following format:
Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
(this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

BEGIN!
Question: {input}
Thought:{agent_scratchpad}'''

prompt = PromptTemplate.from_template(template)

# Initialization of memory to save all the past conversations
memory = ConversationBufferWindowMemory(memory_key="history", k=3)

# Create the agent and executor
agent = create_react_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, memory=memory, verbose=True)

In [30]:
# Defining the run_agent function
def run_agent(query):
    try:
        # Invoke the agent executor
        agent_ans = agent_executor.invoke({"input": query})
    except Exception as e:
        print(f"Error occurred: {e}")
        return "Sorry, I could not process your query."

    return agent_ans

In [31]:
ans = run_agent("total number of department")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mTo find the total number of departments, I should query the employee management database.

Action: query_database
Action Input: How many departments are there?[0m[38;5;200m[1;3mcontent='There are 3 departments in the company: Engineering, Marketing, and Human Resources.' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 502, 'total_tokens': 520, 'completion_time': 0.065454545, 'prompt_time': 0.07428232, 'queue_time': 0.019536975999999998, 'total_time': 0.139736865}, 'model_name': 'Llama-3.3-70b-Versatile', 'system_fingerprint': 'fp_fcc3b74982', 'finish_reason': 'stop', 'logprobs': None} id='run-8ef28fc1-927e-47c7-9fa0-df8c788c0cfc-0' usage_metadata={'input_tokens': 502, 'output_tokens': 18, 'total_tokens': 520}[0m[32;1m[1;3mI now know the final answer
Final Answer: There are 3 departments in the company.[0m

[1m> Finished chain.[0m


In [49]:
run_agent("i am currently in dallas should i carry an umbrella")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: To determine if I should carry an umbrella in Dallas, I need to check the current weather in Dallas. 

Action: current_weather
Action Input: Dallas[0m[33;1m[1;3m{'city': 'Dallas', 'temperature': 13.86, 'feels_like': 13.02, 'humidity': 66, 'description': 'clear sky', 'wind_speed': 3.6}[0m[32;1m[1;3mSince the weather description in Dallas is 'clear sky', it's unlikely to rain. Therefore, I don't need to carry an umbrella.

Thought: I now know the final answer
Final Answer: No, you don't need to carry an umbrella. The current weather in Dallas is clear sky.[0m

[1m> Finished chain.[0m


{'input': 'i am currently in dallas should i carry an umbrella',
 'history': '',
 'output': "No, you don't need to carry an umbrella. The current weather in Dallas is clear sky."}

In [40]:
run_agent("hero of the book is from which country")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mTo determine the country of origin of the hero in the book, I should use the tool that provides information about the story's content, which is the rag_answer tool.

Action: rag_answer
Action Input: What country is the hero of the book from?[0m[36;1m[1;3m{'query': 'What country is the hero of the book from?', 'result': 'The hero of the book, Ulysses, is from the country of the Achaeans. He mentions that he and his people are on their way home from Troy and that they are the people of Agamemnon, son of Atreus. Specifically, Ulysses is from the region of Achaea, which is in ancient Greece.', 'source_documents': [Document(metadata={'source': 'c:\\Users\\ADMIN\\Tution\\Langchain_Leaern\\Project/dataset/odyssey.txt'}, page_content='“‘Strangers, who are you? Where do sail from? Are you traders, or do\nyou sail the sea as rovers, with your hands against every man, and\nevery man’s hand against you?’\n\n“We were frightened out of 

{'input': 'hero of the book is from which country',
 'history': "Human: name of all the department and their budeget\nAI: The names and budgets of all departments are: \n- Engineering with a budget of $1,500,000.00\n- Marketing with a budget of $800,000.00\n- Human Resources with a budget of $500,000.00\nHuman: i am currently in dallas should i carry an umbrella\nAI: Based on the current weather conditions in Dallas, which describe the weather as 'smoke' with a humidity of 71%, but do not mention rain, it is unlikely that you need to carry an umbrella for rain. However, the high humidity and smoke description might suggest fog or other conditions that could potentially require an umbrella. To be safe, you might consider carrying a light umbrella or checking more detailed forecasts for precipitation chances.",
 'output': 'The hero of the book, Ulysses, is from the region of Achaea, which is in ancient Greece.'}

In [43]:
for message in memory.chat_memory.messages:
    if message.type == "human":
        print(f"Human: {message.content}")
    elif message.type == "ai":
        print(f"AI: {message.content}")

Human: name of all the department and their budeget
AI: The names and budgets of all departments are: 
- Engineering with a budget of $1,500,000.00
- Marketing with a budget of $800,000.00
- Human Resources with a budget of $500,000.00
Human: i am currently in dallas should i carry an umbrella
AI: Based on the current weather conditions in Dallas, which describe the weather as 'smoke' with a humidity of 71%, but do not mention rain, it is unlikely that you need to carry an umbrella for rain. However, the high humidity and smoke description might suggest fog or other conditions that could potentially require an umbrella. To be safe, you might consider carrying a light umbrella or checking more detailed forecasts for precipitation chances.
Human: hero of the book is from which country
AI: The hero of the book, Ulysses, is from the region of Achaea, which is in ancient Greece.
