> **This notebook is about using langchain tools to get the db table schema of a SQL database**
> - The goal is to NOT use agents, but the langchain tool is useful.
> - Reference: https://python.langchain.com/docs/expression_language/cookbook/sql_db

## Initialize Environment

In [4]:
import os
from dotenv import load_dotenv

# Set environment variables in the .env file.
load_dotenv()

COMPLETION_MODEL = os.environ["OPENAI_COMPLETION_MODEL"]
COMPLETION_DEPLOYMENT = os.environ["OPENAI_COMPLETION_DEPLOYMENT"]
CHAT_MODEL = os.environ["OPENAI_CHAT_MODEL"]
CHAT_DEPLOYMENT = os.environ["OPENAI_CHAT_DEPLOYMENT"]
OPENAI_API_VERSION = os.environ["OPENAI_API_VERSION"]

SQL_DB_USER = os.environ["SQL_DB_USER"]
SQL_DB_PASSWORD = os.environ["SQL_DB_PASSWORD"]
SQL_DB_SERVER_NAME = os.environ["SQL_DB_SERVER_NAME"]
SQL_DB_NAME = os.environ["SQL_DB_NAME"]
SQL_CONNECTIONSTRING_FORMAT = os.environ["SQL_CONNECTIONSTRING_FORMAT"]

In [5]:
from langchain.llms import AzureOpenAI
from langchain.chat_models import AzureChatOpenAI

llm = AzureOpenAI(
    model_name=COMPLETION_MODEL,
    deployment_name=COMPLETION_DEPLOYMENT,
    #temperature=0.3,
    verbose=True
)

chat = AzureChatOpenAI(
    deployment_name=CHAT_DEPLOYMENT,
    openai_api_version=OPENAI_API_VERSION,
    #temperature=0.3,
    verbose=True
)

## Connect to SQL Database

In [6]:
from langchain.utilities import SQLDatabase

connection_string = SQL_CONNECTIONSTRING_FORMAT.format(
    database_user=SQL_DB_USER,
    database_password=SQL_DB_PASSWORD,
    database_server=SQL_DB_SERVER_NAME,
    database_db=SQL_DB_NAME)

db = SQLDatabase.from_uri(connection_string)

In [7]:
# Test Get Schema
db.get_table_info()

"\nCREATE TABLE [Customer] (\n\t[Id] INTEGER NOT NULL IDENTITY(1,1), \n\t[FirstName] NVARCHAR(40) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL, \n\t[LastName] NVARCHAR(40) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL, \n\t[City] NVARCHAR(40) COLLATE SQL_Latin1_General_CP1_CI_AS NULL, \n\t[Country] NVARCHAR(40) COLLATE SQL_Latin1_General_CP1_CI_AS NULL, \n\t[Phone] NVARCHAR(20) COLLATE SQL_Latin1_General_CP1_CI_AS NULL, \n\tCONSTRAINT [PK_CUSTOMER] PRIMARY KEY ([Id])\n)\n\n/*\n3 rows from Customer table:\nId\tFirstName\tLastName\tCity\tCountry\tPhone\n1\tMaria\tAnders\tBerlin\tGermany\t030-0074321\n2\tAna\tTrujillo\tMéxico D.F.\tMexico\t(5) 555-4729\n3\tAntonio\tMoreno\tMéxico D.F.\tMexico\t(5) 555-3932\n*/\n\n\nCREATE TABLE [OrderItem] (\n\t[Id] INTEGER NOT NULL IDENTITY(1,1), \n\t[OrderId] INTEGER NOT NULL, \n\t[ProductId] INTEGER NOT NULL, \n\t[UnitPrice] FLOAT(53) NULL DEFAULT ((0)), \n\t[Quantity] INTEGER NOT NULL DEFAULT ((1)), \n\tCONSTRAINT [PK_ORDERITEM] PRIMARY KEY ([Id]), \

## Query DB using LLM

In [8]:
def get_schema(_):
    return db.get_table_info()

def run_query(query):
    return db.run(query)

systemprompt = """Act as an expert SQL database engineer.

You will be provided with the SQL used to create the schema of an existing database.
Your goal is to create valid, well-formed SQL that will be executed to fulfill the request.
Use explicit table aliases where possible and do not duplicate column names across joins, make them descriptive and unique.
Only reference the database schema, referencing anything else that wasn't defined will cause an error.

Schema:
```
{schema}
```

Question: {question}

The SQL should be valid, well-formed, and execute without error.
Your response will be executed as is so only output SQL that is immediately runnable.
Analyze from the perspective of other experts in your field and work it out in a step by step way to be sure you have the right answer.
Do not format or add commentary to your response. No prose."""

In [9]:
get_schema(_)

"\nCREATE TABLE [Customer] (\n\t[Id] INTEGER NOT NULL IDENTITY(1,1), \n\t[FirstName] NVARCHAR(40) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL, \n\t[LastName] NVARCHAR(40) COLLATE SQL_Latin1_General_CP1_CI_AS NOT NULL, \n\t[City] NVARCHAR(40) COLLATE SQL_Latin1_General_CP1_CI_AS NULL, \n\t[Country] NVARCHAR(40) COLLATE SQL_Latin1_General_CP1_CI_AS NULL, \n\t[Phone] NVARCHAR(20) COLLATE SQL_Latin1_General_CP1_CI_AS NULL, \n\tCONSTRAINT [PK_CUSTOMER] PRIMARY KEY ([Id])\n)\n\n/*\n3 rows from Customer table:\nId\tFirstName\tLastName\tCity\tCountry\tPhone\n1\tMaria\tAnders\tBerlin\tGermany\t030-0074321\n2\tAna\tTrujillo\tMéxico D.F.\tMexico\t(5) 555-4729\n3\tAntonio\tMoreno\tMéxico D.F.\tMexico\t(5) 555-3932\n*/\n\n\nCREATE TABLE [OrderItem] (\n\t[Id] INTEGER NOT NULL IDENTITY(1,1), \n\t[OrderId] INTEGER NOT NULL, \n\t[ProductId] INTEGER NOT NULL, \n\t[UnitPrice] FLOAT(53) NULL DEFAULT ((0)), \n\t[Quantity] INTEGER NOT NULL DEFAULT ((1)), \n\tCONSTRAINT [PK_ORDERITEM] PRIMARY KEY ([Id]), \

In [10]:
from operator import itemgetter
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableLambda, RunnableMap

RunnableLambda(get_schema)


RunnableLambda(...)

In [11]:
inputs = {
    "schema": RunnableLambda(get_schema),
    "question": itemgetter("question")
}

sql_response = (
        RunnableMap(inputs)
        | systemprompt
        | chat.bind(stop=["\nSQLResult:"])
        | StrOutputParser()
    )

TypeError: Expected a Runnable, callable or dict.Instead got an unsupported type: <class 'str'>

In [None]:
sql_response.invoke({"question": "How many employees are there?"})

In [None]:
template = """Based on the table schema below, question, sql query, and sql response, write a natural language response:
{schema}

Question: {question}
SQL Query: {query}
SQL Response: {response}"""
prompt_response = ChatPromptTemplate.from_template(template)

In [None]:
full_chain = (
    RunnableMap({
        "question": itemgetter("question"),
        "query": sql_response,
    }) 
    | {
        "schema": RunnableLambda(get_schema),
        "question": itemgetter("question"),
        "query": itemgetter("query"),
        "response": lambda x: db.run(x["query"])    
    } 
    | prompt_response 
    | chat
)

In [None]:
full_chain.invoke({"question": "How many employees are there?"})