# SQL Agent with Python Module

In [1]:
import os
import re
import json
import duckdb
import numpy as np
import pandas as pd
from openai import OpenAI
from pydantic import BaseModel, Field
from dotenv import load_dotenv

load_dotenv("../../.env")

True

## Initiate OpenAI Client

In [2]:
client = OpenAI(
    api_key = os.getenv("TYPHOON_API_KEY"),
    base_url = os.getenv("TYPHOON_BASE_URL")
)

response = client.chat.completions.create(
    model = os.getenv("TYPHOON_MODEL"),
    messages = [{"role": "user", "content": "Who are you?"}]
)

print(response.choices[0].message.content)

Hello! I'm Typhoon, a helpful assistant created by SCB 10X. My purpose is to be helpful, harmless, and honest in my interactions. I'm here to assist you with your requests and provide information to the best of my ability. ðŸ˜Š



## Define a list of callable tools for the model

In [9]:
function_definition = [{
    "type": "function",
    "function": {
        "name": "text2sql",
        "description": "Get SQL based on user's question",
        "parameters": {
            "type": "object",
            "properties": {
                "question": {
                    "type": "string",
                    "description": "user's question",
                }
            }
        }
    }
}]

In [4]:
# Import table schema
file = open("../output/chinook_schema.json", "r")
schema_dict = json.load(file)

# Extract table schema to string format
schema_string = "\n".join([schema for schema in schema_dict.values()])

# Write system prompt to instruct LLM for SQL generation task
system_prompt = f"""
You are expert in query generator. \
Your task is to write correct and optimized queries based on the database schema and user questions. \
    Use database schema delimited by tiple single quotes to consider the tables, columns and SQL generation.\
'''{schema_string}'''

Rules:
- Always use the column and table names exactly as defined in schema.\
- Always return only SQL code (no explanation or markdown unless requested)\
- If multiple tables are required, use proper JOINs based on foreign key relationships.\
- If aggregation is needed, include GROUP BY as required.\
- If user input is ambiguous, write your best interpretation and comment it.\
- If the user provides question with term 'Sales Agent', keep in mind that Sales Agents are employees with Title 'Sales Support Agents'\
"""

def text2sql(question):
    client = OpenAI(
        api_key = os.getenv("TYPHOON_API_KEY"),
        base_url = os.getenv("TYPHOON_BASE_URL")
    )

    response = client.chat.completions.create(
        model = "typhoon-v2.5-30b-a3b-instruct",
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": question}
        ],
        max_tokens = 1024
    )

    sql = re.sub("sql|`", "", response.choices[0].message.content).strip()

    return sql

def execute_sql(sql):
    conn = duckdb.connect("../output/chinook.duckdb")
    df = conn.sql(sql).to_df()
    conn.close()

    return df.to_string()


question = "Which country has the most sales?"
query = text2sql(question)
result = execute_sql(query)

print(f"Question: {question}\n\nQuery:\n{query}\n")
print(f"Table:\n{result}")

Question: Which country has the most sales?

Query:
SELECT BillingCountry, SUM(Total) AS TotalSales
FROM invoices
GROUP BY BillingCountry
ORDER BY TotalSales DESC
LIMIT 1;

Table:
  BillingCountry  TotalSales
0            USA      523.06


In [19]:
messages = [
    {"role": "system", "content": "You are helpful assistant."},
    {"role": "user", "content": "Which sales agent made the most sales?"}
]

response = client.chat.completions.create(
    model = os.getenv("TYPHOON_MODEL"),
    messages = messages,
    tools = function_definition
)

response = response.choices[0].message

if response.tool_calls:
    for tool_call in response.tool_calls:
        name = tool_call.function.name
        arguments = json.loads(tool_call.function.arguments)

        if name == "text2sql":
            sql = text2sql(**arguments)
            result = execute_sql(sql)
            messages.append({"role": "tool", "content": json.dumps({"name": name, "arguments": arguments, "results": result})})
    
    response = client.chat.completions.create(
        model = os.getenv("TYPHOON_MODEL"),
        messages = messages
    )

    print(response.choices[0].message.content)

Jane Peacock made the most sales with a total of 146 invoices.
