## SQL Agent

In [1]:
from langchain import SQLDatabase
from langchain_experimental.sql.base import SQLDatabaseChain, SQLDatabaseSequentialChain
import os
from langchain_openai import AzureChatOpenAI

In [2]:
from langchain_openai import ChatOpenAI

In [3]:
import mysql.connector
import urllib.parse

host_name = "<host_name>"
database_username = "<username>"
database_password = "<password>"
database_name = "classicmodels"
port_number = 8502

database_password_encoded = urllib.parse.quote(database_password)
sql_uri = f"mysql+mysqlconnector://{database_username}:{database_password_encoded}@{host_name}:{port_number}/{database_name}"
print(sql_uri)

mysql+mysqlconnector://root:Hitman%40123@13.200.47.4:8502/classicmodels


In [4]:
from sqlalchemy import create_engine
from sqlalchemy.engine import URL

db = SQLDatabase.from_uri(sql_uri)

### Langchain Tools

In [6]:
from typing import Optional, Type
from langchain.pydantic_v1 import BaseModel, Field
from langchain.tools import BaseTool, StructuredTool, tool

from langchain.callbacks.manager import (
    AsyncCallbackManagerForToolRun,
    CallbackManagerForToolRun,
)


class SearchInput(BaseModel):
    query: str = Field(description="should be a user query")
    table_name:str = Field(description="table name to use in the row search")


class CustomSearchTool(BaseTool):
    name = "relevant_rows_search"
    description =  """Input to this tool are the user query and the table name to search. the output is the relevant rows from the table.
     Useful to search relevant rows to the user question and understand and validate the data.
     Example Input: 'some user question', 'table2' """
    args_schema: Type[BaseModel] = SearchInput

    def _run(
        self, query: str,table_name:str ) -> str:
        """Use the tool."""
        vector_db = table_content[table_name]
        rows = vector_db.similarity_search(query)
        return str(rows)
    
relevant_rows_search=CustomSearchTool()

### Azure OpenAI
You can use any ChatModel

In [7]:
os.environ["AZURE_OPENAI_API_KEY"] = ""
os.environ["AZURE_OPENAI_ENDPOINT"] =""


llm_azure = AzureChatOpenAI(
    temperature=0,
    api_version = "2023-07-01-preview",
    azure_deployment="GPT4",
)

### Database into Embeddings

In [9]:
from langchain_community.document_loaders.sql_database import SQLDatabaseLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS

embeddings = OpenAIEmbeddings()
table_names = db.get_usable_table_names()
table_content={}

for table in table_names:
    db_loader = SQLDatabaseLoader(query=f"select * from {table}",db=db)
    data=db_loader.load()    
    db_content = FAISS.from_documents(data, embeddings)
    table_content[table]=db_content

### SQL Agent

In [14]:
from langchain.callbacks.base import BaseCallbackHandler
from langchain_experimental.sql.base import SQLDatabaseChain, SQLDatabaseSequentialChain
from langchain_core.output_parsers.list import CommaSeparatedListOutputParser
from langchain_core.prompts.prompt import PromptTemplate

class SQLHandler(BaseCallbackHandler):
    def __init__(self):
        self.sql_result = []

    def on_agent_action(self, action, **kwargs):
        """Run on agent action. if the tool being used is sql_db_query,
         it means we're submitting the sql and we can 
         record it as the final sql"""

        if action.tool in ["sql_db_query_checker","sql_db_query"]:
            self.sql_result.append(action.tool_input)

from langchain.agents import create_sql_agent
from langchain.agents.agent_types import AgentType
from langchain.tools import tool, Tool

suffix = """I should look at the tables in the database to see what I can query. I should query the schema of the most relevant tables. Then I should search for the relevant rows before generating the query"""

agent = create_sql_agent(
    llm=llm,
    db=db,
    agent_type=AgentType.OPENAI_FUNCTIONS,
    verbose=False,
    handle_parsing_errors=True,
    extra_tools=[relevant_rows_search],
    suffix=suffix
)





### Inference

In [15]:
import time
import pandas as pd

df_benchmark = pd.DataFrame(columns=['question','answer','sql','time'])


In [16]:
user_query = "Retrieve the columns names from the 'customers' table.​"

add_times=0
n_test = 1
for _ in range(n_test):
    handler = SQLHandler()
    start = time.time()
    response=agent.invoke({'input':user_query},{'callbacks':[handler]})
    end = time.time()
    add_times+=end-start

print('time: ',add_times/n_test)
print('Model Answer: \n')
print(response['output'])
sql_queries = handler.sql_result
try:
    sql_ = sql_queries[0]
except:
    sql_ = 'no query'
    
print('\nSQL: \n')
print(sql_)

row = [user_query, response['output'], sql_,add_times/n_test]
df_benchmark.loc[len(df_benchmark)] = row

time:  5.200426816940308
Model Answer: 

The 'customers' table has the following columns:
1. customerNumber
2. customerName
3. contactLastName
4. contactFirstName
5. phone
6. addressLine1
7. addressLine2
8. city
9. state
10. postalCode
11. country
12. salesRepEmployeeNumber
13. creditLimit

SQL: 

no query


In [17]:
user_query = "how many customers are per country?​"
add_times=0
n_test = 1
for _ in range(n_test):
    handler = SQLHandler()
    start = time.time()
    response=agent.invoke({'input':user_query},{'callbacks':[handler]})
    end = time.time()
    add_times+=end-start

print('time: ',add_times/n_test)
print('Model Answer: \n')
print(response['output'])
sql_queries = handler.sql_result
try:
    sql_ = sql_queries[0]
except:
    sql_ = 'no query'
    
print('\nSQL: \n')
print(sql_)

row = [user_query, response['output'], sql_,add_times/n_test]
df_benchmark.loc[len(df_benchmark)] = row

time:  7.794758081436157
Model Answer: 

The number of customers per country are as follows:
- France: 12 customers
- USA: 36 customers
- Australia: 5 customers
- Germany: 13 customers
- Spain: 7 customers
- Sweden: 2 customers
- Denmark: 2 customers
- Singapore: 3 customers
- Portugal: 2 customers
- Japan: 2 customers

And more countries with varying numbers of customers.

SQL: 

SELECT country, COUNT(*) AS customer_count FROM customers GROUP BY country


In [18]:
user_query = "List the 10 lasts orders pair with the customer name and order date.​"
add_times=0
n_test = 1
for _ in range(n_test):
    handler = SQLHandler()
    start = time.time()
    response=agent.invoke({'input':user_query},{'callbacks':[handler]})
    end = time.time()
    add_times+=end-start

print('time: ',add_times/n_test)
print('Model Answer: \n')
print(response['output'])
sql_queries = handler.sql_result
try:
    sql_ = sql_queries[0]
except:
    sql_ = 'no query'
    
print('\nSQL: \n')
print(sql_)

row = [user_query, response['output'], sql_,add_times/n_test]
df_benchmark.loc[len(df_benchmark)] = row

time:  11.86038875579834
Model Answer: 

The 10 last orders paired with the customer name and order date are as follows:

1. Customer Name: Euro+ Shopping Channel, Order Date: 2005-05-31
2. Customer Name: La Rochelle Gifts, Order Date: 2005-05-31
3. Customer Name: Diecast Classics Inc., Order Date: 2005-05-30
4. Customer Name: Petit Auto, Order Date: 2005-05-30
5. Customer Name: Souveniers And Things Co., Order Date: 2005-05-29
6. Customer Name: Mini Gifts Distributors Ltd., Order Date: 2005-05-29
7. Customer Name: Salzburg Collectables, Order Date: 2005-05-17
8. Customer Name: Extreme Desk Decorations, Ltd, Order Date: 2005-05-16
9. Customer Name: Euro+ Shopping Channel, Order Date: 2005-05-13
10. Customer Name: L'ordine Souveniers, Order Date: 2005-05-10

SQL: 

SELECT customers.customerName, orders.orderDate FROM customers JOIN orders ON customers.customerNumber = orders.customerNumber ORDER BY orders.orderDate DESC LIMIT 10


In [19]:
user_query = "Find the total amount spent by the top 10 customer.​"
add_times=0
n_test = 1
for _ in range(n_test):
    handler = SQLHandler()
    start = time.time()
    response=agent.invoke({'input':user_query},{'callbacks':[handler]})
    end = time.time()
    add_times+=end-start

print('time: ',add_times/n_test)
print('Model Answer: \n')
print(response['output'])
sql_queries = handler.sql_result
try:
    sql_ = sql_queries[0]
except:
    sql_ = 'no query'
    
print('\nSQL: \n')
print(sql_)

row = [user_query, response['output'], sql_,add_times/n_test]
df_benchmark.loc[len(df_benchmark)] = row

time:  11.466569900512695
Model Answer: 

The total amount spent by the top 10 customers are as follows:
1. Euro+ Shopping Channel: $715,738.98
2. Mini Gifts Distributors Ltd.: $584,188.24
3. Australian Collectors, Co.: $180,585.07
4. Muscle Machine Inc: $177,913.95
5. Dragon Souveniers, Ltd.: $156,251.03
6. Down Under Souveniers, Inc: $154,622.08
7. AV Stores, Co.: $148,410.09
8. Anna's Decorations, Ltd: $137,034.22
9. Corporate Gift Ideas Co.: $132,340.78
10. Saveley & Henriot, Co.: $130,305.35

SQL: 

SELECT c.customerNumber, c.customerName, SUM(p.amount) AS total_amount_spent
FROM customers c
JOIN payments p ON c.customerNumber = p.customerNumber
GROUP BY c.customerNumber, c.customerName
ORDER BY total_amount_spent DESC
LIMIT 10


In [20]:
user_query = "Show the total quantity of each product ordered by customers along with their corresponding product codes and names.​"
add_times=0
n_test = 1
for _ in range(n_test):
    handler = SQLHandler()
    start = time.time()
    response=agent.invoke({'input':user_query},{'callbacks':[handler]})
    end = time.time()
    add_times+=end-start

print('time: ',add_times/n_test)
print('Model Answer: \n')
print(response['output'])
sql_queries = handler.sql_result
try:
    sql_ = sql_queries[0]
except:
    sql_ = 'no query'
    
print('\nSQL: \n')
print(sql_)

row = [user_query, response['output'], sql_,add_times/n_test]
df_benchmark.loc[len(df_benchmark)] = row

time:  16.925842761993408
Model Answer: 

Here are the total quantities of each product ordered by customers along with their corresponding product codes and names:

1. Product Code: S18_3232, Product Name: 1992 Ferrari 360 Spider red, Total Quantity: 1808
2. Product Code: S18_1342, Product Name: 1937 Lincoln Berline, Total Quantity: 1111
3. Product Code: S700_4002, Product Name: American Airlines: MD-11S, Total Quantity: 1085
4. Product Code: S18_3856, Product Name: 1941 Chevrolet Special Deluxe Cabriolet, Total Quantity: 1076
5. Product Code: S50_1341, Product Name: 1930 Buick Marquette Phaeton, Total Quantity: 1074
6. Product Code: S18_4600, Product Name: 1940s Ford truck, Total Quantity: 1061
7. Product Code: S10_1678, Product Name: 1969 Harley Davidson Ultimate Chopper, Total Quantity: 1057
8. Product Code: S12_4473, Product Name: 1957 Chevy Pickup, Total Quantity: 1056
9. Product Code: S18_2319, Product Name: 1964 Mercedes Tour Bus, Total Quantity: 1053
10. Product Code: S24_3856

In [21]:
user_query = "How does the average payment amount vary based on the customer's credit limit?​"
add_times=0
n_test = 1
for _ in range(n_test):
    handler = SQLHandler()
    start = time.time()
    response=agent.invoke({'input':user_query},{'callbacks':[handler]})
    end = time.time()
    add_times+=end-start

print('time: ',add_times/n_test)
print('Model Answer: \n')
print(response['output'])
sql_queries = handler.sql_result
try:
    sql_ = sql_queries[0]
except:
    sql_ = 'no query'
    
print('\nSQL: \n')
print(sql_)

row = [user_query, response['output'], sql_,add_times/n_test]
df_benchmark.loc[len(df_benchmark)] = row

time:  10.61057424545288
Model Answer: 

The average payment amount varies based on the customer's credit limit. Here are some examples of the average payment amount for different credit limits:

- For a credit limit of $21,000, the average payment amount is $7,438.12.
- For a credit limit of $71,800, the average payment amount is $26,726.99.
- For a credit limit of $117,300, the average payment amount is $45,146.27.
- For a credit limit of $118,200, the average payment amount is $38,983.23.
- For a credit limit of $81,700, the average payment amount is $26,056.20.

These are just a few examples of how the average payment amount varies based on the customer's credit limit.

SQL: 

SELECT c.creditLimit, AVG(p.amount) AS average_payment_amount FROM customers c JOIN payments p ON c.customerNumber = p.customerNumber GROUP BY c.creditLimit


In [22]:
user_query = "Retrieve the names of employees and their managers from San Francisco​"
add_times=0
n_test = 1
for _ in range(n_test):
    handler = SQLHandler()
    start = time.time()
    response=agent.invoke({'input':user_query},{'callbacks':[handler]})
    end = time.time()
    add_times+=end-start

print('time: ',add_times/n_test)
print('Model Answer: \n')
print(response['output'])
sql_queries = handler.sql_result
try:
    sql_ = sql_queries[0]
except:
    sql_ = 'no query'
    
print('\nSQL: \n')
print(sql_)

row = [user_query, response['output'], sql_,add_times/n_test]
df_benchmark.loc[len(df_benchmark)] = row

time:  9.264816284179688
Model Answer: 

The names of employees and their managers from San Francisco are as follows:
1. Employee: Mary Patterson, Manager: Diane Murphy
2. Employee: Jeff Firrelli, Manager: Diane Murphy
3. Employee: Anthony Bow, Manager: Mary Patterson
4. Employee: Leslie Jennings, Manager: Anthony Bow
5. Employee: Leslie Thompson, Manager: Anthony Bow

SQL: 

SELECT e.firstName, e.lastName, m.firstName AS managerFirstName, m.lastName AS managerLastName FROM employees e JOIN employees m ON e.reportsTo = m.employeeNumber JOIN offices o ON e.officeCode = o.officeCode WHERE o.city = 'San Francisco'


In [23]:
user_query = "List the employee names and their corresponding office cities for employees who have customers located in the USA.​"
add_times=0
n_test = 1
for _ in range(n_test):
    handler = SQLHandler()
    start = time.time()
    response=agent.invoke({'input':user_query},{'callbacks':[handler]})
    end = time.time()
    add_times+=end-start

print('time: ',add_times/n_test)
print('Model Answer: \n')
print(response['output'])
sql_queries = handler.sql_result
try:
    sql_ = sql_queries[0]
except:
    sql_ = 'no query'
    
print('\nSQL: \n')
print(sql_)

row = [user_query, response['output'], sql_,add_times/n_test]
df_benchmark.loc[len(df_benchmark)] = row

time:  11.83650517463684
Model Answer: 

The employee names and their corresponding office cities for employees who have customers located in the USA are as follows:

1. Employee: Leslie Thompson, Office City: San Francisco
2. Employee: Leslie Jennings, Office City: San Francisco
3. Employee: George Vanauf, Office City: NYC
4. Employee: Foon Yue Tseng, Office City: NYC
5. Employee: Steve Patterson, Office City: Boston
6. Employee: Julie Firrelli, Office City: Boston

These are some of the employees who have customers located in the USA along with their office cities.

SQL: 

SELECT e.firstName, e.lastName, o.city FROM employees e JOIN offices o ON e.officeCode = o.officeCode JOIN customers c ON e.employeeNumber = c.salesRepEmployeeNumber WHERE c.country = 'USA'


In [24]:
user_query = "List the product lines with the total number of products in each line, showing only those product lines having more than 5 products.​"
add_times=0
n_test = 1
for _ in range(n_test):
    handler = SQLHandler()
    start = time.time()
    response=agent.invoke({'input':user_query},{'callbacks':[handler]})
    end = time.time()
    add_times+=end-start

print('time: ',add_times/n_test)
print('Model Answer: \n')
print(response['output'])
sql_queries = handler.sql_result
try:
    sql_ = sql_queries[0]
except:
    sql_ = 'no query'
    
print('\nSQL: \n')
print(sql_)

row = [user_query, response['output'], sql_,add_times/n_test]
df_benchmark.loc[len(df_benchmark)] = row

time:  10.268896102905273
Model Answer: 

The product lines with the total number of products in each line, showing only those product lines having more than 5 products are:
- Classic Cars: 38 products
- Motorcycles: 13 products
- Planes: 12 products
- Ships: 9 products
- Trucks and Buses: 11 products
- Vintage Cars: 24 products

SQL: 

SELECT productLine, COUNT(*) AS totalProducts FROM products GROUP BY productLine HAVING COUNT(*) > 5


In [25]:
user_query = "Display 10 product details along with their product line and supplier information.​"
add_times=0
n_test = 1
for _ in range(n_test):
    handler = SQLHandler()
    start = time.time()
    response=agent.invoke({'input':user_query},{'callbacks':[handler]})
    end = time.time()
    add_times+=end-start

print('time: ',add_times/n_test)
print('Model Answer: \n')
print(response['output'])
sql_queries = handler.sql_result
try:
    sql_ = sql_queries[0]
except:
    sql_ = 'no query'
    
print('\nSQL: \n')
print(sql_)

row = [user_query, response['output'], sql_,add_times/n_test]
df_benchmark.loc[len(df_benchmark)] = row

time:  23.76386332511902
Model Answer: 

Here are 10 product details along with their product line and supplier information:

1. Product Name: 1952 Alpine Renault 1300
   - Product Line: Classic Cars
   - Product Vendor: Classic Metal Creations
   - Product Line Description: Attention car enthusiasts: Make your wildest car ownership dreams come true. Whether you are looking for classic muscle cars, dream sports cars or movie-inspired miniatures, you will find great choices in this category. These replicas feature superb attention to detail and craftsmanship.

2. Product Name: 1972 Alfa Romeo GTA
   - Product Line: Classic Cars
   - Product Vendor: Motor City Art Classics
   - Product Line Description: Attention car enthusiasts: Make your wildest car ownership dreams come true. Whether you are looking for classic muscle cars, dream sports cars or movie-inspired miniatures, you will find great choices in this category. These replicas feature superb attention to detail and craftsmanship.


In [26]:
user_query = "How many orders has each customer placed, and what is the average order value?​"
add_times=0
n_test = 1
for _ in range(n_test):
    handler = SQLHandler()
    start = time.time()
    response=agent.invoke({'input':user_query},{'callbacks':[handler]})
    end = time.time()
    add_times+=end-start

print('time: ',add_times/n_test)
print('Model Answer: \n')
print(response['output'])
sql_queries = handler.sql_result
try:
    sql_ = sql_queries[0]
except:
    sql_ = 'no query'
    
print('\nSQL: \n')
print(sql_)

row = [user_query, response['output'], sql_,add_times/n_test]
df_benchmark.loc[len(df_benchmark)] = row

time:  17.49903440475464
Model Answer: 

Here are the results of the query:

- Customer: Atelier graphique
  - Total Orders: 7
  - Average Order Value: $3187.77

- Customer: Signal Gift Stores
  - Total Orders: 29
  - Average Order Value: $2764.86

- Customer: Australian Collectors, Co.
  - Total Orders: 55
  - Average Order Value: $3283.36

- Customer: La Rochelle Gifts
  - Total Orders: 53
  - Average Order Value: $2991.95

- Customer: Baane Mini Imports
  - Total Orders: 32
  - Average Order Value: $3257.02

- Customer: Mini Gifts Distributors Ltd.
  - Total Orders: 180
  - Average Order Value: $3287.93

- Customer: Blauer See Auto, Co.
  - Total Orders: 22
  - Average Order Value: $3451.72

- Customer: Mini Wheels Co.
  - Total Orders: 21
  - Average Order Value: $3176.69

- Customer: Land of Toys Inc.
  - Total Orders: 49
  - Average Order Value: $3042.55

- Customer: Euro+ Shopping Channel
  - Total Orders: 259
  - Average Order Value: $3168.69

These are the number of orders pla

In [27]:
user_query = "Show the customer name, order date, product name, and quantity ordered for the last 10 orders.​"
add_times=0
n_test = 1
for _ in range(n_test):
    handler = SQLHandler()
    start = time.time()
    response=agent.invoke({'input':user_query},{'callbacks':[handler]})
    end = time.time()
    add_times+=end-start

print('time: ',add_times/n_test)
print('Model Answer: \n')
print(response['output'])
sql_queries = handler.sql_result
try:
    sql_ = sql_queries[0]
except:
    sql_ = 'no query'
    
print('\nSQL: \n')
print(sql_)

row = [user_query, response['output'], sql_,add_times/n_test]
df_benchmark.loc[len(df_benchmark)] = row

time:  17.388266563415527
Model Answer: 

The query results show the customer name, order date, product name, and quantity ordered for the last 10 orders:

1. Customer Name: Euro+ Shopping Channel, Order Date: 2005-05-31, Product Name: 1952 Alpine Renault 1300, Quantity Ordered: 50
2. Customer Name: Euro+ Shopping Channel, Order Date: 2005-05-31, Product Name: 1958 Setra Bus, Quantity Ordered: 49
3. Customer Name: Euro+ Shopping Channel, Order Date: 2005-05-31, Product Name: 1940 Ford Pickup Truck, Quantity Ordered: 54
4. Customer Name: Euro+ Shopping Channel, Order Date: 2005-05-31, Product Name: 1939 Cadillac Limousine, Quantity Ordered: 26
5. Customer Name: Euro+ Shopping Channel, Order Date: 2005-05-31, Product Name: 1996 Peterbilt 379 Stake Bed with Outrigger, Quantity Ordered: 44
6. Customer Name: Euro+ Shopping Channel, Order Date: 2005-05-31, Product Name: 1982 Camaro Z28, Quantity Ordered: 46
7. Customer Name: La Rochelle Gifts, Order Date: 2005-05-31, Product Name: 1962 Lanci

In [28]:
user_query = "Retrieve the product names and descriptions for the 5 products that have been ordered the most by customers located in the USA.​"
add_times=0
n_test = 1
for _ in range(n_test):
    handler = SQLHandler()
    start = time.time()
    response=agent.invoke({'input':user_query},{'callbacks':[handler]})
    end = time.time()
    add_times+=end-start

print('time: ',add_times/n_test)
print('Model Answer: \n')
print(response['output'])
sql_queries = handler.sql_result
try:
    sql_ = sql_queries[0]
except:
    sql_ = 'no query'
    
print('\nSQL: \n')
print(sql_)

row = [user_query, response['output'], sql_,add_times/n_test]
df_benchmark.loc[len(df_benchmark)] = row

time:  17.77046537399292
Model Answer: 

The product names and descriptions for the 5 products that have been ordered the most by customers located in the USA are as follows:

1. Product Name: 1992 Ferrari 360 Spider red
   Description: This replica features opening doors, superb detail and craftsmanship, working steering system, opening forward compartment, opening rear trunk with removable spare, 4 wheel independent spring suspension as well as factory baked enamel finish.
   Total Orders: 16

2. Product Name: 2002 Suzuki XREO
   Description: Official logos and insignias, saddle bags located on side of motorcycle, detailed engine, working steering, working suspension, two leather seats, luggage rack, dual exhaust pipes, small saddle bag located on handle bars, two-tone paint with chrome accents, superior die-cast detail , rotating...
   Total Orders: 14

3. Product Name: 1957 Chevy Pickup
   Description: 1:12 scale die-cast about 20" long Hood opens, Rubber wheels
   Total Orders: 14

In [29]:
user_query = "Find the number of customers who have placed more than one order in USA​"
add_times=0
n_test = 1
for _ in range(n_test):
    handler = SQLHandler()
    start = time.time()
    response=agent.invoke({'input':user_query},{'callbacks':[handler]})
    end = time.time()
    add_times+=end-start

print('time: ',add_times/n_test)
print('Model Answer: \n')
print(response['output'])
sql_queries = handler.sql_result
try:
    sql_ = sql_queries[0]
except:
    sql_ = 'no query'
    
print('\nSQL: \n')
print(sql_)

row = [user_query, response['output'], sql_,add_times/n_test]
df_benchmark.loc[len(df_benchmark)] = row

time:  8.342421054840088
Model Answer: 

There are 97 customers who have placed more than one order in the USA.

SQL: 

SELECT COUNT(DISTINCT customerNumber) AS num_customers FROM orders WHERE customerNumber IN (SELECT customerNumber FROM orders GROUP BY customerNumber HAVING COUNT(*) > 1)


In [30]:
user_query = "Calculate the average number of days it takes to ship each product from the order date to the shipped date.​"
add_times=0
n_test = 1
for _ in range(n_test):
    handler = SQLHandler()
    start = time.time()
    response=agent.invoke({'input':user_query},{'callbacks':[handler]})
    end = time.time()
    add_times+=end-start

print('time: ',add_times/n_test)
print('Model Answer: \n')
print(response['output'])
sql_queries = handler.sql_result
try:
    sql_ = sql_queries[0]
except:
    sql_ = 'no query'
    
print('\nSQL: \n')
print(sql_)

row = [user_query, response['output'], sql_,add_times/n_test]
df_benchmark.loc[len(df_benchmark)] = row

time:  8.500002145767212
Model Answer: 

The average number of days it takes to ship each product from the order date to the shipped date is approximately 3.76 days.

SQL: 

SELECT AVG(DATEDIFF(shippedDate, orderDate)) AS avg_shipping_days FROM orders WHERE shippedDate IS NOT NULL


In [31]:
user_query = "Show me the total amount sold comparison between the product line Cars vs Trains​"
add_times=0
n_test = 1
for _ in range(n_test):
    handler = SQLHandler()
    start = time.time()
    response=agent.invoke({'input':user_query},{'callbacks':[handler]})
    end = time.time()
    add_times+=end-start

print('time: ',add_times/n_test)
print('Model Answer: \n')
print(response['output'])
sql_queries = handler.sql_result
try:
    sql_ = sql_queries[0]
except:
    sql_ = 'no query'
    
print('\nSQL: \n')
print(sql_)

row = [user_query, response['output'], sql_,add_times/n_test]
df_benchmark.loc[len(df_benchmark)] = row

time:  11.635133504867554
Model Answer: 

The total amount sold for the product line "Trains" is $188,532.92. Unfortunately, there is no data available for the product line "Cars" in the database.

SQL: 

SELECT p.productLine, SUM(od.quantityOrdered * od.priceEach) AS total_amount_sold FROM products p JOIN orderdetails od ON p.productCode = od.productCode WHERE p.productLine IN ('Cars', 'Trains') GROUP BY p.productLine


In [32]:
user_query = "Who are the Sales Representative from US?​"
add_times=0
n_test = 1
for _ in range(n_test):
    handler = SQLHandler()
    start = time.time()
    response=agent.invoke({'input':user_query},{'callbacks':[handler]})
    end = time.time()
    add_times+=end-start

print('time: ',add_times/n_test)
print('Model Answer: \n')
print(response['output'])
sql_queries = handler.sql_result
try:
    sql_ = sql_queries[0]
except:
    sql_ = 'no query'
    
print('\nSQL: \n')
print(sql_)

row = [user_query, response['output'], sql_,add_times/n_test]
df_benchmark.loc[len(df_benchmark)] = row

time:  9.167347192764282
Model Answer: 

The Sales Representatives from the US are:
1. Leslie Jennings
2. Leslie Thompson
3. Julie Firrelli
4. Steve Patterson
5. Foon Yue Tseng
6. George Vanauf

SQL: 

SELECT e.firstName, e.lastName FROM employees e JOIN customers c ON e.employeeNumber = c.salesRepEmployeeNumber WHERE e.jobTitle = 'Sales Rep' AND c.country = 'USA'


In [33]:
df_benchmark.to_csv('langchain-4.csv')