In [1]:
# !pip install torch transformers accelerate bitsandbytes scipy 

In [2]:
!nvidia-smi

Mon Sep 18 18:39:14 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.65.01    Driver Version: 515.65.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:01:00.0 Off |                  N/A |
|  0%   54C    P8    26W / 350W |   1773MiB / 24576MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from typing import List
import sqlite3
import transformers
print(transformers.__version__)
print(torch.cuda.is_available())

4.32.1
True


# Load the model

Set parameter load_in_4bit=True for a model size of 12GB, otherwise the model will be too big. 
If you work on A100 machine, parameter torch_dtype=torch.bfloat16 can be set to True instead.

Can read more here: https://huggingface.co/blog/4bit-transformers-bitsandbytes

In [4]:
sql_model_name = "defog/sqlcoder"
sql_tokenizer = AutoTokenizer.from_pretrained(sql_model_name, cache_dir='/mnt/4TBSSD/huawei2023cache/')
sql_model = AutoModelForCausalLM.from_pretrained(
    sql_model_name,
    trust_remote_code=True,
    load_in_4bit=True,
    device_map="auto",
    use_cache=True,
    cache_dir='/mnt/4TBSSD/huawei2023cache/'
)

answer_model_name = "meta-llama/Llama-2-7b-chat-hf"
token = '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'

answer_tokenizer = AutoTokenizer.from_pretrained(answer_model_name, token=token, cache_dir='/mnt/4TBSSD/huawei2023cache/')
answer_model = transformers.pipeline(
    "text-generation",
    model='openai-gpt',
    # torch_dtype=torch.bfloat16,
    # device_map="auto",
    # token=token
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [19]:
def generate_sql_query(model, tokenizer, question: str, db_schema: str, tables_hints: List[str]) -> str:
    # Implement logic to generate an SQL query based on the question and table hints.
    # Replace the "pass" with a calling function to LLM
    
    # Handle the case when table hints are empty or invalid.
    if not tables_hints:
        # Default behavior: Query all tables
        pass
    
    # Handle the general case
    # Example: "SELECT COUNT(*) FROM customers"
    prompt = generate_sql_query_generation_prompt(question, db_schema)
    eos_token_id = tokenizer.convert_tokens_to_ids(["```"])[0]
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda") # or to("cpu")
    generated_ids = model.generate(
        **inputs,
        num_return_sequences=1,
        eos_token_id=eos_token_id,
        pad_token_id=eos_token_id,
        max_new_tokens=400,
        do_sample=False,
        num_beams=1
    )
    
    outputs = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
    sql_query = outputs[0].split("```sql")[-1].split("```")[0].split(";")[0].strip() + ";"
    torch.cuda.empty_cache()
    torch.cuda.synchronize()
    
    return sql_query

In [23]:
def generate_answer_with_context(model, tokenizer, question: str, schema: List[str]) -> str:
    answer_generation_prompt = generate_answer_generation_prompt(question, schema)
    
    ### Use the same procedure
    # eos_token_id = tokenizer.convert_tokens_to_ids(["```"])[0]
    # inputs = tokenizer(answer_generation_prompt, return_tensors="pt").to("cuda") # or to("cpu")

    answer = model(
        answer_generation_prompt,
        do_sample=False,
        # top_k=10,
        num_return_sequences=1,
        # eos_token_id=tokenizer.eos_token_id,
        # max_length=200,
    )

    
    # for seq in sequences:
    #     print(f"Result: {seq['generated_text']}")
    
    # generated_ids = model.generate(
    #     **inputs,
    #     num_return_sequences=1,
    #     eos_token_id=eos_token_id,
    #     pad_token_id=eos_token_id,
    #     max_new_tokens=400,
    #     do_sample=False,
    #     num_beams=1
    # )
    
    # answer = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
    torch.cuda.empty_cache()
    torch.cuda.synchronize()
    
    return answer

# Prompt Engineering

In [7]:
SQL_QUERY_PROMPT_TEMPLATE = """### Instructions:
Your task is to convert a question into a SQL query, given a SQLlite database schema.
Adhere to these rules:
- **Deliberately go through the question and database schema word by word** to appropriately answer the question
- **Use Table Aliases** to prevent ambiguity. For example, `SELECT table1.col1, table2.col1 FROM table1 JOIN table2 ON table1.id = table2.id`.
- When creating a ratio, always cast the numerator as float
### Input:
Generate a SQL query that answers the question `{question}`.
This query will run on a database whose schema is represented in this string:
{db_schema}

### Response:
Based on your instructions, here is the SQL query I have generated to answer the question `{question}`:
```sql
"""

ANSWER_GENERATION_PROMPT_TEMPLATE = """
The data we have: `{data}`
The question: `{question}`
The answer: 
"""

In [8]:
def generate_sql_query_generation_prompt(question, db_schema):
    return SQL_QUERY_PROMPT_TEMPLATE.format(question=question, db_schema=db_schema)

def generate_answer_generation_prompt(question, data):
    return ANSWER_GENERATION_PROMPT_TEMPLATE.format(question=question, data=str(data))

In [9]:
def format_sql_execution(sql_response, sql_schema):
    results = []
    for row in sql_response:
        results.append({
            k:v for k,v in zip(sql_schema, row)
        })
    return results

# Database

In [10]:
def get_schemas(cursor, table_hints=None):
    '''
    get the schema information from this database
    '''
    tableQuery="SELECT name FROM sqlite_master WHERE type='table'"
    tableList=cursor.execute(tableQuery).fetchall()
    schemas = []
    tables = {}
    for table in tableList:
        tableName=table[0]
        columnQuery="PRAGMA table_info('%s')" % tableName
        schema=cursor.execute(columnQuery).fetchall()
        tables[tableName] = schema
    return tables

# Main functions

In [11]:
def connect_fun(database_name: str) -> sqlite3.Connection:
    """
    Connect to an SQLite database and return a connection object.

    Parameters:
        database_name (str): The name (or path) of the SQLite database file to connect to.

    Returns:
        sqlite3.Connection or None: A connection object if the connection is successful,
        or None if there is an error.

    Example usage:
        db_name = 'your_database_name.db'
        connection = connect_fun(db_name)
        
        if connection:
            print(f"Connected to {db_name}")
            # You can now use 'connection' to interact with the database.
        else:
            print("Connection failed.")
    """
    try:
        connection = sqlite3.connect(database_name)
        return connection
    except sqlite3.Error as e:
        print(f"Error connecting to the database: {e}")
        return None


def query_fun(question: str, conn: sqlite3.Connection, tables_hints: List[str]=None, debug:bool=False) -> str:
    """
    Generate an answer to a question based on an SQLite database and question context.

    Parameters:
        question (str): The user's question.
        tables_hints (List[str]): List of table names to consider in the query.
        conn (sqlite3.Connection): A connection to the SQLite database.

    Returns:
        str: The answer to the question.

    Example usage:
        question = "How many customers are there in the database?"
        table_hints = ["customers"]
        connection = sqlite3.connect("your_database.db")
        answer = query_fun(question, table_hints, connection)
        print(answer)
    """
    try:
        cursor = conn.cursor()

        # Step 0: Get related tables based on all schemas and table hints
        related_schemas = get_schemas(cursor, tables_hints) 

        if debug:
            print("Related schemas: ", related_schemas) 
        
        # Step 1: Generate an SQL query based on the question and table hints.
        sql_query = generate_sql_query(sql_model, sql_tokenizer, question, related_schemas, tables_hints)

        if debug:
            print("SQL query: ", sql_query)

        # Step 2: Execute the SQL query and fetch the results.
        response = cursor.execute(sql_query)

        # Step 3: Obtain records from response and schema information (column names) from the cursor description.
        records = response.fetchall()
        reponse_schema = [desc[0] for desc in cursor.description]
        sql_response = format_sql_execution(records, reponse_schema)

        if debug:
            print("SQL execution response: ", sql_response)

        # Step 4: Process the query result and generate an answer with context using LLM.
        answer = generate_answer_with_context(answer_model, answer_tokenizer, question, records)

        return sql_query, answer

    except sqlite3.Error as e:
        print(f"SQLite Error: {e}")
        return "An error occurred while processing the query."
    except Exception as e:
        print(f"Error: {e}")
        return "An error occurred."

# Experiments

![alt](/mnt/4TBSSD/pmkhoi/huawei-sql/huawei-arena-2023/imgs/chinook-er-diagram.png)

In [13]:
connection = connect_fun('/mnt/4TBSSD/pmkhoi/huawei-sql/huawei-arena-2023/sample_db/Chinook_Sqlite.sqlite')

In [14]:
cursor = connection.cursor()
related_schemas = get_schemas(cursor, table_hints=None)

In [15]:
related_schemas

{'Album': [(0, 'AlbumId', 'INTEGER', 1, None, 1),
  (1, 'Title', 'NVARCHAR(160)', 1, None, 0),
  (2, 'ArtistId', 'INTEGER', 1, None, 0)],
 'Artist': [(0, 'ArtistId', 'INTEGER', 1, None, 1),
  (1, 'Name', 'NVARCHAR(120)', 0, None, 0)],
 'Customer': [(0, 'CustomerId', 'INTEGER', 1, None, 1),
  (1, 'FirstName', 'NVARCHAR(40)', 1, None, 0),
  (2, 'LastName', 'NVARCHAR(20)', 1, None, 0),
  (3, 'Company', 'NVARCHAR(80)', 0, None, 0),
  (4, 'Address', 'NVARCHAR(70)', 0, None, 0),
  (5, 'City', 'NVARCHAR(40)', 0, None, 0),
  (6, 'State', 'NVARCHAR(40)', 0, None, 0),
  (7, 'Country', 'NVARCHAR(40)', 0, None, 0),
  (8, 'PostalCode', 'NVARCHAR(10)', 0, None, 0),
  (9, 'Phone', 'NVARCHAR(24)', 0, None, 0),
  (10, 'Fax', 'NVARCHAR(24)', 0, None, 0),
  (11, 'Email', 'NVARCHAR(60)', 1, None, 0),
  (12, 'SupportRepId', 'INTEGER', 0, None, 0)],
 'Employee': [(0, 'EmployeeId', 'INTEGER', 1, None, 1),
  (1, 'LastName', 'NVARCHAR(20)', 1, None, 0),
  (2, 'FirstName', 'NVARCHAR(20)', 1, None, 0),
  (3, 'Ti

In [16]:
questions = [
    # "What is the highest sales of three salesman person? Give me the salesperson's name and his or her total sales",
    # "In 1981 which team picked overall 148?"

    "Get me the name of all the tracks"
]

In [24]:
%%time
sql_command, text_response = query_fun(
    question=questions[0],
    conn=connection
)

Error: The size of tensor a (35950) must match the size of tensor b (512) at non-singleton dimension 1




ValueError: too many values to unpack (expected 2)

In [22]:
sql_command

NameError: name 'sql_command' is not defined

In [None]:
text_response