# NL2SQL leveraging the Azure Accelerator

In [None]:
# import sys
# print(sys.executable)

# # !pip list

# import pkgutil
# import langchain

# # List all modules and sub-packages within 'langchain'
# package = langchain
# for importer, modname, ispkg in pkgutil.iter_modules(package.__path__):
#     print(f"{modname}: Is a package? {ispkg}")

In [19]:
import os
import pandas as pd
import pyodbc
from langchain_openai import AzureChatOpenAI
from langchain_community.agent_toolkits import create_sql_agent, SQLDatabaseToolkit
# from langchain.agents import create_sql_agent 
from langchain.agents import initialize_agent
from langchain.agents import Tool
from langchain.agents import create_sql_agent  # No longer in agent_toolkits
from langchain.sql_database import SQLDatabase  # Toolkit is replaced with SQLDatabase
from langchain.agents import AgentExecutor
from langchain.callbacks.manager import CallbackManager
from common.prompts import MSSQL_AGENT_PREFIX
from IPython.display import Markdown, HTML, display  
from dotenv import load_dotenv

load_dotenv("credentials.env")

def printmd(string):
    display(Markdown(string))

printmd("##### Depedencies loaded...")

##### Depedencies loaded...

In [20]:
# Set environment variables
app_env = os.environ["AZURE_OPENAI_API_VERSION"]

print(f"app_env: {app_env}")

app_env: 2023-12-01-preview


In [21]:
# Configure Logging
import logging

# Define a custom log format
log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'

# logging.basicConfig(level=logging.DEBUG)

logging.basicConfig(level=logging.DEBUG, format=log_format)
logger = logging.getLogger('langchain')

printmd("##### Logger created...")

##### Logger created...

In [22]:
from sqlalchemy import create_engine, text
from sqlalchemy.engine import URL
import os

# Configuration for the database connection
db_config = {
    'drivername': 'mssql+pyodbc',
    'username': os.environ["SQL_SERVER_USERNAME"] + '@' + os.environ["SQL_SERVER_NAME"],
    'password': os.environ["SQL_SERVER_PASSWORD"],
    'host': os.environ["SQL_SERVER_NAME"],
    'port': 1433,
    'database': os.environ["SQL_SERVER_DATABASE"],
    'query': {'driver': 'ODBC Driver 17 for SQL Server'},
}

# Create a URL object for connecting to the database
db_url = URL.create(**db_config)

# Connect to the Azure SQL Database using the URL string
engine = create_engine(db_url)

# Test the connection using the SQLAlchemy 2.0 execution style
with engine.connect() as conn:
    try:
        # Use the text() construct for safer SQL execution
        result = conn.execute(text("SELECT @@VERSION"))
        version = result.fetchone()
        print("Connection successful!")
        print(version)
    except Exception as e:
        print(e)

Connection successful!
('Microsoft SQL Azure (RTM) - 12.0.2000.8 \n\tJun 19 2024 16:01:48 \n\tCopyright (C) 2022 Microsoft Corporation\n',)


In [23]:
# # Read CSV file into a pandas dataframe

# csv_path = "./data/all-states-history.csv"
# df = pd.read_csv(csv_path).fillna(value = 0)

# # Infer column names and data types
# column_names = df.columns.tolist()
# column_types = df.dtypes.to_dict()

# # Generate SQL statement to create table
# table_name = 'covidtracking'

# create_table_sql = f"CREATE TABLE {table_name} ("
# for name, dtype in column_types.items():
#     if dtype == 'object':
#         create_table_sql += f"{name} VARCHAR(MAX), "
#     elif dtype == 'int64':
#         create_table_sql += f"{name} INT, "
#     elif dtype == 'float64':
#         create_table_sql += f"{name} FLOAT, "
#     elif dtype == 'bool':
#         create_table_sql += f"{name} BIT, "
#     elif dtype == 'datetime64[ns]':
#         create_table_sql += f"{name} DATETIME, "
# create_table_sql = create_table_sql[:-2] + ")"

# try:
#     #Createse the table in Azure SQL
#     with engine.connect() as conn:
#         # Execute the create table SQL statement
#         conn.execute(text(create_table_sql))
#         print("Table", table_name, "successfully created")
#     # Insert data into SQL Database
#     lower = 0
#     upper = 1000
#     limit = df.shape[0]

#     while lower < limit:
#         df[lower:upper].to_sql(table_name, con=engine, if_exists='append', index=False)
#         print("rows:", lower, "-", upper, "inserted")
#         lower = upper
#         upper = min(upper + 1000, limit)

# except Exception as e:
#     print(e)

In [24]:
llm = AzureChatOpenAI(deployment_name=os.environ["GPT35_DEPLOYMENT_NAME"], temperature=0.5, max_tokens=2000, api_version=os.environ["AZURE_OPENAI_API_VERSION"])
# llm = AzureChatOpenAI(deployment_name=os.environ["AZURE_OPENAI_API_VERSION"], temperature=0.5, max_tokens=2000)

print(llm)

2024-07-13 19:31:49,176 - httpx - DEBUG - load_ssl_context verify=True cert=None trust_env=True http2=False
2024-07-13 19:31:49,180 - httpx - DEBUG - load_verify_locations cafile='C:\\Users\\robvet\\AppData\\Local\\miniconda3\\Library\\ssl\\cacert.pem'
2024-07-13 19:31:49,553 - httpx - DEBUG - load_ssl_context verify=True cert=None trust_env=True http2=False
2024-07-13 19:31:49,555 - httpx - DEBUG - load_verify_locations cafile='C:\\Users\\robvet\\AppData\\Local\\miniconda3\\Library\\ssl\\cacert.pem'


client=<openai.resources.chat.completions.Completions object at 0x0000025044871280> async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x0000025044857B30> temperature=0.5 openai_api_key=SecretStr('**********') openai_proxy='' max_tokens=2000 azure_endpoint='https://ai-west-openai.openai.azure.com/' deployment_name='gpt4-o' openai_api_version='2023-12-01-preview' openai_api_type='azure'


In [25]:
# Let's create the db object
db = SQLDatabase.from_uri(db_url)

# print(db.get_table_names())
print("SqlAgent Created")
print("Found following tables:")
print(db.get_usable_table_names())

SqlAgent Created
Found following tables:
['Artists', 'Conditions', 'Descriptions', 'Genres', 'Mediums', 'Products', 'Status', 'sysdiagrams']


In [8]:
# Natural Language question (query)
QUESTION = """
How may patients were hospitalized during July 2020 in Texas. 
And nationwide as the total of all states? 
Use the hospitalizedIncrease column
"""
printmd(f"{QUESTION}")


How may patients were hospitalized during July 2020 in Texas. 
And nationwide as the total of all states? 
Use the hospitalizedIncrease column


In [26]:
# Natural Language question (query)
QUESTION = """
How many products are available in the inventory?
"""
printmd(f"{QUESTION}")


How many products are available in the inventory?


In [35]:
# Natural Language question (query)
QUESTION = """
List the all of the rock products
"""
printmd(f"{QUESTION}")


List the all of the rock products


In [None]:
# Natural Language question (query)
QUESTION = """
What products include Bruce Springsteen?
"""
printmd(f"{QUESTION}")

In [36]:
# Create Callback Manager to capture SQL queries
from langchain.callbacks.base import BaseCallbackHandler

class SQLCallbackHandler(BaseCallbackHandler):
    def on_tool_end(self, output, color="green", **kwargs):
        """Run when tool ends running."""
        print(f"SQL Query: {output}")

In [37]:
toolkit = SQLDatabaseToolkit(db=db, llm=llm)

try:
    agent_executor = create_sql_agent(
        prefix=MSSQL_AGENT_PREFIX,
        llm=llm,
        toolkit=toolkit,
        top_k=30,
        agent_type="openai-tools",
        verbose=True,
        callback_manager=CallbackManager([SQLCallbackHandler()]),
        agent_executor_kwargs={"return_intermediate_steps": True},
    )
except Exception as e:
    print(f"An error occurred while creating the agent_executor: {str(e)}")
    agent_executor = None  # Set agent_executor to None or handle it as needed

# Continue with the rest of your code, checking if agent_executor was created successfully
if agent_executor:
    printmd(f"##### Agent Executor Created")

##### Agent Executor Created

In [38]:
# As we know by now, Agents use expert/tools. Let's see which are the tools for this SQL Agent
agent_executor.tools

[QuerySQLDataBaseTool(description="Input to this tool is a detailed and correct SQL query, output is a result from the database. If the query is not correct, an error message will be returned. If an error is returned, rewrite the query, check the query, and try again. If you encounter an issue with Unknown column 'xxxx' in 'field list', use sql_db_schema to query the correct table fields.", db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x00000250448563F0>),
 InfoSQLDatabaseTool(description='Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables. Be sure that the tables actually exist by calling sql_db_list_tables first! Example Input: table1, table2, table3', db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x00000250448563F0>),
 ListSQLDatabaseTool(db=<langchain_community.utilities.sql_database.SQLDatabase object at 0x00000250448563F0>),
 QuerySQLCheckerTool(description='Use this tool to 

In [39]:
show_details = False

try:
    response = agent_executor.invoke(QUESTION) 
except Exception as e:
    response = str(e)

# printmd(f"**Response:** {response}")

# for step in response['intermediate_steps']:
#     action = step.get('action', '')  # Use .get to handle missing keys
#     observation = step.get('observation', '')
#     print(f"Action: {action}\nObservation: {observation}\n")

if show_details: 

    for step in response['intermediate_steps']:
        if isinstance(step, dict):
            action = step.get('action', '')
            observation = step.get('observation', '')
        elif isinstance(step, tuple):
            action, observation = step 
        else:
            print(f"Unexpected step type: {type(step)}")
            continue  # Skip this step if it's neither dict nor tuple
        print(f"Action: {action}\nObservation: {observation}\n")
else:
    print("No details to show")

# print(response['intermediate_steps'])

2024-07-13 19:50:28,847 - openai._base_client - DEBUG - Request options: {'method': 'post', 'url': '/chat/completions', 'headers': {'api-key': 'af7e7c68b3534ec293717777b66c17d1'}, 'files': None, 'json_data': {'messages': [{'content': '\n\nYou are an agent designed to interact with a SQL database.\n## Instructions:\n- Given an input question, create a syntactically correct mssql query to run, then look at the results of the query and return the answer.\n- Unless the user specifies a specific number of examples they wish to obtain, **ALWAYS** limit your query to at most 30 results.\n- You can order the results by a relevant column to return the most interesting examples in the database.\n- Never query for all the columns from a specific table, only ask for the relevant columns given the question.\n- You have access to tools for interacting with the database.\n- You MUST double check your query before executing it. If you get an error while executing a query, rewrite the query and try aga

2024-07-13 19:50:28,945 - httpcore.connection - DEBUG - connect_tcp.complete return_value=<httpcore._backends.sync.SyncStream object at 0x00000250448A32F0>
2024-07-13 19:50:28,946 - httpcore.connection - DEBUG - start_tls.started ssl_context=<ssl.SSLContext object at 0x00000250449CA450> server_hostname='ai-west-openai.openai.azure.com' timeout=None




[1m> Entering new SQL Agent Executor chain...[0m


2024-07-13 19:50:29,039 - httpcore.connection - DEBUG - start_tls.complete return_value=<httpcore._backends.sync.SyncStream object at 0x0000025043EF2480>
2024-07-13 19:50:29,041 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'POST']>
2024-07-13 19:50:29,042 - httpcore.http11 - DEBUG - send_request_headers.complete
2024-07-13 19:50:29,043 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'POST']>
2024-07-13 19:50:29,044 - httpcore.http11 - DEBUG - send_request_body.complete
2024-07-13 19:50:29,044 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'POST']>
2024-07-13 19:50:29,322 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Transfer-Encoding', b'chunked'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'x-ms-region', b'West US 3'), (b'apim-request-id', b'73e8d61e-aa59-4b24-80e9-7bd552f9b6af'), (b'x-ratelimit-remaining-requests', b'149'),

[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{}`


[0m[38;5;200m[1;3mArtists, Conditions, Descriptions, Genres, Mediums, Products, Status, sysdiagrams[0m

2024-07-13 19:50:29,775 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Transfer-Encoding', b'chunked'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'x-ms-region', b'West US 3'), (b'apim-request-id', b'c5c8131e-3443-409e-a4f1-9340e710e0d9'), (b'x-ratelimit-remaining-requests', b'148'), (b'x-accel-buffering', b'no'), (b'x-ms-rai-invoked', b'true'), (b'x-request-id', b'a8b1205e-651e-400f-83e3-1dfaeed80030'), (b'Strict-Transport-Security', b'max-age=31536000; includeSubDomains; preload'), (b'azureml-model-session', b'd104-20240617183304'), (b'x-content-type-options', b'nosniff'), (b'x-envoy-upstream-service-time', b'201'), (b'x-ms-client-request-id', b'c5c8131e-3443-409e-a4f1-9340e710e0d9'), (b'x-ratelimit-remaining-tokens', b'144145'), (b'Date', b'Sun, 14 Jul 2024 00:50:29 GMT')])
2024-07-13 19:50:29,778 - httpx - INFO - HTTP Request: POST https://ai-west-openai.openai.azure.com//openai/deployments/gpt4-o/chat/complet

[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'Products, Genres'}`


[0m[33;1m[1;3m
CREATE TABLE [Genres] (
	[GenreId] INTEGER NOT NULL IDENTITY(1,1), 
	[GuidId] UNIQUEIDENTIFIER NOT NULL, 
	[Name] NVARCHAR(450) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL, 
	[CreateDate] DATETIME2 NOT NULL DEFAULT (getdate()), 
	[IsActive] BIT NOT NULL DEFAULT ((1)), 
	CONSTRAINT [PK_Genres] PRIMARY KEY ([GenreId])
)

/*
3 rows from Genres table:
GenreId	GuidId	Name	CreateDate	IsActive
2	9a5c561f-435c-49a4-a2ae-6cf42c0cb313	Alternative Rock	2023-07-03 20:17:14.103333	True
3	95c9cc23-da7d-457b-9388-9c7847581c9c	Art Rock	2023-07-03 20:17:14.103333	True
4	6c20e80a-113e-489c-bb43-adc79213897b	Blues Rock	2023-07-03 20:17:14.103333	True
*/


CREATE TABLE [Products] (
	[Id] INTEGER NOT NULL IDENTITY(1,1), 
	[ProductId] UNIQUEIDENTIFIER NOT NULL, 
	[GenreId] INTEGER NOT NULL, 
	[ArtistId] INTEGER NOT NULL, 
	[StatusId] INTEGER NOT NULL, 
	[MediumId] INTEGER NOT NULL, 
	[ConditionId] INTEGE

2024-07-13 19:50:30,090 - openai._base_client - DEBUG - Sending HTTP Request: POST https://ai-west-openai.openai.azure.com//openai/deployments/gpt4-o/chat/completions?api-version=2023-12-01-preview
2024-07-13 19:50:30,091 - httpcore.http11 - DEBUG - send_request_headers.started request=<Request [b'POST']>
2024-07-13 19:50:30,093 - httpcore.http11 - DEBUG - send_request_headers.complete
2024-07-13 19:50:30,094 - httpcore.http11 - DEBUG - send_request_body.started request=<Request [b'POST']>
2024-07-13 19:50:30,095 - httpcore.http11 - DEBUG - send_request_body.complete
2024-07-13 19:50:30,096 - httpcore.http11 - DEBUG - receive_response_headers.started request=<Request [b'POST']>
2024-07-13 19:50:30,387 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Transfer-Encoding', b'chunked'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'x-ms-region', b'West US 3'), (b'apim-request-id', b'be6b0e59-ae58-4b64-a77c-df026b468ee2'), 

[32;1m[1;3m
Invoking: `sql_db_query_checker` with `{'query': "SELECT Title FROM Products WHERE GenreId IN (SELECT GenreId FROM Genres WHERE Name LIKE '%Rock%') LIMIT 30"}`


[0m

2024-07-13 19:50:31,200 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Content-Length', b'919'), (b'Content-Type', b'application/json'), (b'x-ms-region', b'West US 3'), (b'apim-request-id', b'ce24d097-89ac-4144-8546-f238d21ced3c'), (b'x-ratelimit-remaining-requests', b'146'), (b'x-accel-buffering', b'no'), (b'x-ms-rai-invoked', b'true'), (b'x-request-id', b'6089d418-c95b-48c4-9d8f-72891a6db5ce'), (b'Strict-Transport-Security', b'max-age=31536000; includeSubDomains; preload'), (b'azureml-model-session', b'd104-20240617183304'), (b'x-content-type-options', b'nosniff'), (b'x-envoy-upstream-service-time', b'427'), (b'x-ms-client-request-id', b'ce24d097-89ac-4144-8546-f238d21ced3c'), (b'x-ratelimit-remaining-tokens', b'138345'), (b'Date', b'Sun, 14 Jul 2024 00:50:31 GMT')])
2024-07-13 19:50:31,203 - httpx - INFO - HTTP Request: POST https://ai-west-openai.openai.azure.com//openai/deployments/gpt4-o/chat/completions?api-version=2023-1

[36;1m[1;3m```sql
SELECT Title
FROM Products
WHERE GenreId IN (SELECT GenreId FROM Genres WHERE Name LIKE '%Rock%')
LIMIT 30
```[0m

2024-07-13 19:50:31,580 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Transfer-Encoding', b'chunked'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'x-ms-region', b'West US 3'), (b'apim-request-id', b'58d416bf-203f-4896-9bab-2d77e7d0741f'), (b'x-ratelimit-remaining-requests', b'145'), (b'x-accel-buffering', b'no'), (b'x-ms-rai-invoked', b'true'), (b'x-request-id', b'7259c8ae-ceab-4797-a2dc-6354a10d5eeb'), (b'Strict-Transport-Security', b'max-age=31536000; includeSubDomains; preload'), (b'azureml-model-session', b'd104-20240617183304'), (b'x-content-type-options', b'nosniff'), (b'x-envoy-upstream-service-time', b'215'), (b'x-ms-client-request-id', b'58d416bf-203f-4896-9bab-2d77e7d0741f'), (b'x-ratelimit-remaining-tokens', b'134679'), (b'Date', b'Sun, 14 Jul 2024 00:50:31 GMT')])
2024-07-13 19:50:31,582 - httpx - INFO - HTTP Request: POST https://ai-west-openai.openai.azure.com//openai/deployments/gpt4-o/chat/complet

[32;1m[1;3m
Invoking: `sql_db_query` with `{'query': "SELECT TOP 30 Title FROM Products WHERE GenreId IN (SELECT GenreId FROM Genres WHERE Name LIKE '%Rock%')"}`


[0m[36;1m[1;3m[('American Idiot',), ('Blood Sugar Sex Magik',), ('Collective Soul',), ('Hints Allegations and Things Left Unsaid',), ('In Your Honor',), ('Jar of Flies',), ('No Boundaries: A Benefit for the Kosovar Refugees',), ('One by One',), ('One Hot Minute',), ('The Colour and the Shape',), ('There Is Nothing Left to Lose',), ('Duke',), ('A Day at the Races',), ('A Night at the Opera',), ('Breakfast in America',), ('Genesis',), ('Surrealistic Pillow',), ('Viva la Vida or Death and All His Friends',), (' With a Little Help From My Friends',), ('A Space in Time',), ('Book of Dreams',), ('LA Woman',), ('Layla and Other Assorted Love Songs',), ('Let It Bleed',), ('Morrison Hotel',), ('Pearl',), ('Slowhand',), ('Tattoo You',), ('The Blues Brothers: Original Soundtrack Recording',), ('The Captain and Me',)][0m

2024-07-13 19:50:32,213 - httpcore.http11 - DEBUG - receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Transfer-Encoding', b'chunked'), (b'Content-Type', b'text/event-stream; charset=utf-8'), (b'x-ms-region', b'West US 3'), (b'apim-request-id', b'2e77691a-635c-4eb3-8d71-d77aa0562118'), (b'x-ratelimit-remaining-requests', b'144'), (b'x-accel-buffering', b'no'), (b'x-ms-rai-invoked', b'true'), (b'x-request-id', b'1498a84f-426d-45fd-9bb7-5a81f523d335'), (b'Strict-Transport-Security', b'max-age=31536000; includeSubDomains; preload'), (b'azureml-model-session', b'd104-20240617183304'), (b'x-content-type-options', b'nosniff'), (b'x-envoy-upstream-service-time', b'134'), (b'x-ms-client-request-id', b'2e77691a-635c-4eb3-8d71-d77aa0562118'), (b'x-ratelimit-remaining-tokens', b'130812'), (b'Date', b'Sun, 14 Jul 2024 00:50:32 GMT')])
2024-07-13 19:50:32,214 - httpx - INFO - HTTP Request: POST https://ai-west-openai.openai.azure.com//openai/deployments/gpt4-o/chat/complet

[32;1m[1;3mFinal Answer: Here are some of the rock products:

1. American Idiot
2. Blood Sugar Sex Magik
3. Collective Soul
4. Hints Allegations and Things Left Unsaid
5. In Your Honor
6. Jar of Flies
7. No Boundaries: A Benefit for the Kosovar Refugees
8. One by One
9. One Hot Minute
10. The Colour and the Shape
11. There Is Nothing Left to Lose
12. Duke
13. A Day at the Races
14. A Night at the Opera
15. Breakfast in America
16. Genesis
17. Surrealistic Pillow
18. Viva la Vida or Death and All His Friends
19. With a Little Help From My Friends
20. A Space in Time
21. Book of Dreams
22. LA Woman
23. Layla and Other Assorted Love Songs
24. Let It Bleed
25. Morrison Hotel
26. Pearl
27. Slowhand
28. Tattoo You
29. The Blues Brothers: Original Soundtrack Recording
30. The Captain and Me

Explanation:
I queried the `Products` table for titles of products where the `GenreId` corresponds to any genre containing the word "Rock" in the `Genres` table. I limited the results to the top 30 entr

In [40]:
print (response["output"])

Final Answer: Here are some of the rock products:

1. American Idiot
2. Blood Sugar Sex Magik
3. Collective Soul
4. Hints Allegations and Things Left Unsaid
5. In Your Honor
6. Jar of Flies
7. No Boundaries: A Benefit for the Kosovar Refugees
8. One by One
9. One Hot Minute
10. The Colour and the Shape
11. There Is Nothing Left to Lose
12. Duke
13. A Day at the Races
14. A Night at the Opera
15. Breakfast in America
16. Genesis
17. Surrealistic Pillow
18. Viva la Vida or Death and All His Friends
19. With a Little Help From My Friends
20. A Space in Time
21. Book of Dreams
22. LA Woman
23. Layla and Other Assorted Love Songs
24. Let It Bleed
25. Morrison Hotel
26. Pearl
27. Slowhand
28. Tattoo You
29. The Blues Brothers: Original Soundtrack Recording
30. The Captain and Me

Explanation:
I queried the `Products` table for titles of products where the `GenreId` corresponds to any genre containing the word "Rock" in the `Genres` table. I limited the results to the top 30 entries. The SQL 