In [3]:
from langchain.utilities import SQLDatabase  # Connect to the mysql database
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEmbeddings
load_dotenv()
from langchain_chroma import Chroma
from langchain_core.example_selectors import SemanticSimilarityExampleSelector
from langchain_core.messages import HumanMessage,AnyMessage,AIMessage,ToolMessage,SystemMessage
from langchain.tools import tool
from langchain.agents import AgentExecutor , create_openai_tools_agent
from langchain.prompts import ChatPromptTemplate,HumanMessagePromptTemplate,MessagesPlaceholder
from langchain_core.runnables import RunnableLambda,RunnableParallel,RunnablePassthrough,RunnableSequence
from langchain_core.messages import trim_messages
from langchain_groq import ChatGroq


# 1. Connect to Mysql Database

In [5]:
from urllib.parse import quote_plus
from langchain_community.utilities import SQLDatabase

# Credentials
username = 'root'
password = 'Sushant@45#'  # Original password with special characters
host = '127.0.0.1'
db_name = 'nepa_wholesale'

# URL encode the password to handle special characters
encoded_password = quote_plus(password)

# Create the MySQL URI with encoded password
mysql_uri = f"mysql+pymysql://{username}:{encoded_password}@{host}/{db_name}"

print(f"Encoded URI: {mysql_uri}")

# Create database connection
try:
    db = SQLDatabase.from_uri(mysql_uri, sample_rows_in_table_info=2)
    print("Database connection successful!")
    
    # Test the connection
    tables = db.get_usable_table_names()
    print(f"Available tables: {tables}")
    
except Exception as e:
    print(f"Connection failed: {e}")

Encoded URI: mysql+pymysql://root:Sushant%4045%23@127.0.0.1/nepa_wholesale
Database connection successful!
Available tables: ['cigars_category', 'disposable_category', 'tobaccos_category']


In [6]:
print(db.dialect)
print(db.get_usable_table_names()) # to get the name of tables
# print(db.get_table_info()) # to get the schema of the tables

mysql
['cigars_category', 'disposable_category', 'tobaccos_category']


# 2. Initialize the llm and embedding-model

In [10]:
from dotenv import load_dotenv
load_dotenv()

# Now you can use ChatGroq without explicitly passing the API key
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0.3,
    max_tokens=300
)

In [None]:
# llm.invoke("Hi")

In [11]:
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


# 3. Creating fewshot examples

In [12]:
examples = [
    {
        'question': "How many products of fronto are there in tobaccos ?",
        'query': "SELECT COUNT(*) FROM tobaccos_category WHERE Brand LIKE '%FRONTO%';"
    },
    {
        'question': "How many products are there in tobaccos with flavor mint",
        'query': "SELECT COUNT(Product_ID) FROM tobaccos_category WHERE Flavor LIKE '%mint%';"
    },
    {
        'question': "How many products are there in disposable having nicotine less than 5%. list them ?",
        'query': "SELECT Product_ID, Display_Name, Nicotine_strength FROM disposable_category WHERE CAST(REPLACE(Nicotine_strength, '%', '') AS DECIMAL(3,2)) < 5 LIMIT 5;"
    },
    {
        'question': "Which flavors of cigars have 'Raspberry' in them?",
        'query': "SELECT Flavor FROM cigars_category WHERE Flavor LIKE '%Raspberry%' LIMIT 5;"
    },
    {
        'question': "List disposable products with nicotine strength of 2%",
        'query': "SELECT Display_Name, Nicotine_strength FROM disposable_category WHERE Nicotine_strength LIKE '%2%%' LIMIT 5;"
    },
    {
        'question': "I’m looking for cigar products from AL CAPONE. What do you have?",
        'query': "SELECT Display_Name, Flavor FROM cigars_category WHERE Brand LIKE '%AL CAPONE%' LIMIT 5;"
    },
    {
        'question': "How many unique flavors are offered in disposable products with 5000 puffs?",
        'query': "SELECT COUNT(DISTINCT Flavor) FROM disposable_category WHERE Puff_count = 5000;"
    },
    {
        'question': "I’m looking for cigars that come in a 12PK. What options do you have?",
        'query': "SELECT Display_Name, Brand, Flavor FROM cigars_category WHERE Packet_count LIKE '%12PK%' LIMIT 5;"
    },
    {
        'question': "Get the top 5 disposable products with the highest puff count.",
        'query': "SELECT Display_Name, Puff_count FROM disposable_category ORDER BY Puff_count DESC LIMIT 5;"
    },
    {
        'question': "Which brands offer disposable products with puff count equal to 5000?",
        'query': "SELECT DISTINCT Brand FROM disposable_category WHERE Puff_count = 5000 LIMIT 5;"
    },
    {
        'question': "How many products are there in DEATH ROW DISPOSABLE sub-category with 2% nicotine and 5 pack count?",
        'query': "SELECT COUNT(*) FROM disposable_category WHERE Product_Sub_Category LIKE '%DEATH ROW DISPOSABLE%' AND Nicotine_strength LIKE '%2%%' AND Pack_count = 5;"
    },
    {
        'question': "How many different cigar flavors do you offer?",
        'query': "SELECT COUNT(DISTINCT Flavor) FROM cigars_category;"
    }
]


# 4. Creating the vectorstore

In [None]:
vectorstore = Chroma(
    embedding_function = embedding_model, # Embedding model
    collection_name = "example_collection", # Table name in vectorstore
    persist_directory="./chroma_db" 
)

In [None]:
# Addind the examples in vectorstore (Only questions as text and entire 'question and query' in metadata)
vectorstore.add_texts([ex['question'] for ex in examples],metadatas=examples)

# 5. Defining the examples selector from vectorstore

In [None]:
example_selector = SemanticSimilarityExampleSelector(
    vectorstore=vectorstore,
    k=1 , 
    input_keys = ['question'],   # Which key to use for similarity search
    example_keys=['question','query'] # Which keys to return
)

# 6. Build dynamic prompt with : prompt + examples 

In [None]:
def build_dynamic_prefix(user_question: str) -> str:
    # print(messages[-1].content)
    # print(user_question)
    # user_question = messages
    # print(f"***** Selectiong Dynamic example using this question***** {user_question}")
    # user_question = 
    selected = example_selector.select_examples({"question": user_question})
    formatted_examples = "\n".join([
        f"Human: {ex['question']}\nAI:\nSQLQuery: {ex['query']}" for ex in selected
    ])
    return f"""User Question to answer :  {user_question}

Refer to the below given most similar examples to answer the above user question . 

Examples:
{formatted_examples}
...

Now begin.
"""
# dynamic_prefix = build_dynamic_prefix("what is my income of last year")

# 7. Using Create_SQL_agent with agent_type "Openai_Functions"

### Note : Only works with 'OPEN-AI MODELS"

In [None]:

# tool to get the list of tables from database
@tool
def list_tables(_: str = "") -> str:
    """Use this tool to list all table names that exist in the connected MySQL database. Pass an empty string to get the list of available tables
    This is helpful when you need to know what tables are available before writing a SQL query."""
    return str(db.get_table_names())


# tool to get schema of table to be used
@tool
def describe_table(table_name: str) -> str:
    """Use this tool to get the schema (column names and types) of a specific table.Input should be the name of the table as a string. 
    This is useful to understand what data is stored in the table before writing queries."""
    return str(db.get_table_info([table_name]))

# tool to execute the sql query in database
@tool
def run_sql_query(query: str) -> str:
    """Use this tool to run a raw SQL query on the database and return the result.
    Input should be a complete and valid SQL SELECT query as a string. 
    Use this when you already know which table and columns to query."""
    return str(db.run(query))





In [None]:
print(run_sql_query.name)  #to get the name of the tool
print(run_sql_query.description) #to get the description of the tool
print(run_sql_query.args) # to get the input arguments to the tools 
print(run_sql_query.args_schema.model_json_schema()) # to get the schmea of the tool 

In [None]:
chat_history = []

In [None]:
system_msg = """You are a helpful assistant that uses tools to interact with a MySQL database. Create a syntactically correct MySQL query to run, execute the Query and return the final response in natural language back, after proper formatting.
You have access to the following tools, Use the 'list_tables' tools to identify the list of available tables and decide which table to use and then use 'describe_table' tool and based on the results from both generate SQL query and use 'run_sql_query' tool.
1. list_tables: List all available tables. 
2. describe_table: Get the schema of a table. 
3. run_sql_query: Run a raw SQL query. 

Unless the user specifies in the question a specific number of examples to obtain, query for at most 5 results using the LIMIT clause as per MySQL. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Do not hallicunate and incase you do not find the answer respond with "Sorry I am unable to answer your question".

Below given is the chat history betwen the human and the agent. Also handle the follow up question and use the tools provided if you need ."""

In [None]:
# Trim messages to keep last 2 conversation
def conversational_window_memory(messages):
    selected_msg = trim_messages(
    messages,
    token_counter=len,  
    max_tokens=2,  
    strategy="last",
    
    start_on="human",

    include_system=True,
    allow_partial=False,

    )
    return selected_msg

In [None]:
tools = [list_tables,describe_table,run_sql_query]

def initialize_agent(prompt):
    agent = create_openai_tools_agent(llm=llm, tools=tools, prompt=prompt)

    agent_executor2 = AgentExecutor(agent=agent, tools=tools, verbose=True)
    return agent , agent_executor2

In [None]:
## Assembling all the components

user_question = input("Enter the query: ")
if user_question:
    
    dynamic_prefix = build_dynamic_prefix(user_question) # User question + 1 most similar examples from vectorstore

    prompt = ChatPromptTemplate.from_messages([          # Prompt to agent that conist of : system prompt + chat_history + user_question and dynamic example + Empty agent scratchpad
    ("system", system_msg),

    MessagesPlaceholder(variable_name = "chat_history"),

    ("human",dynamic_prefix),
    MessagesPlaceholder(variable_name="agent_scratchpad")  #
])
    
    agent,agent_executor = initialize_agent(prompt)  # Initialize the agent and agent executor with the prompt

    response = agent_executor.invoke({               # Invoke the agent with empty "" input and chat_history
    "chat_history" : chat_history 
    })
    print(f"User Question: {user_question}")
    print(f"Agent: {response}")

    chat_history.append(HumanMessage(content=user_question))        # Appedn the user question to chat_history

    chat_history.append(AIMessage(response['output']))                #Append the agent's final response to chat_history

    chat_history = conversational_window_memory(chat_history)          # Trim to keep only 2 msg (equivalent to 1 conversation between human and agent)
    



In [None]:
print(f"User Question: {user_question}")
print(f"Agent: {response['output']}")
print(chat_history)

In [None]:
response

### ---------------------------------------------------------------------------------------------------------------------------------