# PRED-SQL EMBEDDING SELECTION EXPERIMENT

In [1]:
import os
import json
import nest_asyncio
from tqdm import tqdm
from utils.prompts.prompt_builder import prompt_factory
from utils.data.data_builder import load_data
from utils.llm.ask_llm import run_llm
from third_party.spider_eval.evaluation import evaluate_spider
from utils.data.post_process import save_results
from utils.prompts.prompt_builder import get_openai_key

EMBEDDING = 's2593817/sft-sql-embedding'

In [2]:
from openai import OpenAI

# Set the environment variable to provide access to the OpenAI API (set in utils/parameters.py)
os.environ['OPENAI_API_KEY'] = get_openai_key()

# Create the OpenAI client
client = OpenAI()

In [3]:
path_data = "benchmarks"

data = load_data("spider", path_data, pre_test_result= 'data/pre-predictions/dail-sql+GPT-4.txt')

## Pred-SQL Fine-Tuned Embedding Selector Spider 1-Shot Experiment

In [4]:
prompt = prompt_factory(k_shot = 1, repr_type= "SQL", example_format= "QA", selector_type= "EMBEDPRED", embedding_model= EMBEDDING)(data=data, tokenizer="gpt-3.5-turbo")

In [5]:
# Format all Spider test questions for 1-Shot GPT-3.5-Turbo Text-to-SQL conversion
questions = list()

for i, question_json in enumerate(tqdm(getattr(data, "get_test_json")()), start=1):
    question_format = prompt.format(index=i,
                                    target=question_json,
                                    max_seq_len=2048,
                                    max_ans_len=200,
                                    scope_factor=1,
                                    cross_domain=False)
    questions.append(question_format)

100%|██████████| 1034/1034 [05:46<00:00,  2.99it/s]


In [6]:
# Define the task parameters for saving the questions for the Spider baseline experiment

args = {
    "data_type": "spider",
    "split": "test",
    "tokenizer": "gpt-3.5-turbo",
    "max_seq_length": 2048,
    "prompt_repr": "SQL",
    "k-shot": 1,
    "example_type": "QA",
    "selector_type": "EMBEDPRED",
    "embedding_model": EMBEDDING
}

# Define the task object for saving the questions for the Spider baseline experiment

task = {
        "args": args,
        "questions": questions
    }

OUT_DIR = os.path.join("chapter-4","results", "pred-sql-embedding-sft-experiments", "1-shot")
os.makedirs(OUT_DIR, exist_ok =True)

PROMPTS_FILE = os.path.join(OUT_DIR, "pred-sql-embedding-sft-prompts-1.json")
json.dump(task, open(PROMPTS_FILE, "w"), indent=4)

In [7]:
OUT_DIR = os.path.join("chapter-4","results", "pred-sql-embedding-sft-experiments", "1-shot")

PROMPTS_FILE = os.path.join(OUT_DIR, "pred-sql-embedding-sft-prompts-1.json")

RESPONSES_FILE = os.path.join(OUT_DIR, "pred-sql-embedding-sft-responses-1.txt")

run_llm(PROMPTS_FILE, RESPONSES_FILE, model="gpt-3.5-turbo")

100%|██████████| 1034/1034 [21:20<00:00,  1.24s/it]  


In [6]:
nest_asyncio.apply()

OUT_DIR = os.path.join("chapter-4","results", "pred-sql-embedding-sft-experiments", "1-shot")

PROMPTS_FILE = os.path.join(OUT_DIR, "pred-sql-embedding-sft-prompts-1.json")
RESPONSES_FILE = os.path.join(OUT_DIR, "pred-sql-embedding-sft-responses-1.txt")
RESULTS_FILE = os.path.join(OUT_DIR,"pred-sql-embedding-sft-results-1.json")

results = evaluate_spider(gold="benchmarks/spider/dev_gold.sql", pred=RESPONSES_FILE, db="benchmarks/spider/databases", table="benchmarks/spider/tables.json")

save_results(PROMPTS_FILE, RESPONSES_FILE, RESULTS_FILE, results)

100%|██████████| 1/1 [00:01<00:00,  1.60s/it]
100%|██████████| 1/1 [00:00<00:00, 124.86it/s]
100%|██████████| 1/1 [00:00<00:00, 71.23it/s]
100%|██████████| 1/1 [00:00<00:00, 118.70it/s]
100%|██████████| 1/1 [00:00<00:00, 106.89it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 131.08it/s]
100%|██████████| 1/1 [00:00<00:00, 124.49it/s]
100%|██████████| 1/1 [00:00<00:00, 75.37it/s]
100%|██████████| 1/1 [00:00<00:00, 199.46it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 113.22it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 318.93it/s]
100%|██████████| 1/1 [00:00<00:00, 501.65it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 

                     easy                 medium               hard                 extra                all                 
count                248                  446                  174                  166                  1034                
execution            0.895                0.841                0.649                0.542                0.774               


## Pred-SQL Fine-Tuned Embedding Selector Spider 3-shot Experiment

In [9]:
prompt = prompt_factory(k_shot = 3, repr_type= "SQL", example_format= "QA", selector_type= "EMBEDPRED", embedding_model= EMBEDDING)(data=data, tokenizer="gpt-3.5-turbo")

In [10]:
# Format all Spider test questions for 3-Shot GPT-3.5-Turbo Text-to-SQL conversion
questions = list()

for i, question_json in enumerate(tqdm(getattr(data, "get_test_json")()), start=1):
    question_format = prompt.format(index=i,
                                    target=question_json,
                                    max_seq_len=2048,
                                    max_ans_len=200,
                                    scope_factor=1,
                                    cross_domain=False)
    questions.append(question_format)

100%|██████████| 1034/1034 [05:59<00:00,  2.87it/s]


In [11]:
# Define the task parameters for saving the questions for the Spider baseline experiment

args = {
    "data_type": "spider",
    "split": "test",
    "tokenizer": "gpt-3.5-turbo",
    "max_seq_length": 2048,
    "prompt_repr": "SQL",
    "k-shot": 3,
    "example_type": "QA",
    "selector_type": "EMBEDPRED",
    "embedding_model": EMBEDDING
}

# Define the task object for saving the questions for the Spider baseline experiment

task = {
        "args": args,
        "questions": questions
    }

OUT_DIR = os.path.join("chapter-4","results", "pred-sql-embedding-sft-experiments", "3-shot")
os.makedirs(OUT_DIR, exist_ok =True)

PROMPTS_FILE = os.path.join(OUT_DIR, "pred-sql-embedding-sft-prompts-3.json")
json.dump(task, open(PROMPTS_FILE, "w"), indent=4)

In [12]:
OUT_DIR = os.path.join("chapter-4","results", "pred-sql-embedding-sft-experiments", "3-shot")

PROMPTS_FILE = os.path.join(OUT_DIR, "pred-sql-embedding-sft-prompts-3.json")

RESPONSES_FILE = os.path.join(OUT_DIR, "pred-sql-embedding-sft-responses-3.txt")

run_llm(PROMPTS_FILE, RESPONSES_FILE, model="gpt-3.5-turbo")

100%|██████████| 1034/1034 [14:16<00:00,  1.21it/s]


In [13]:
nest_asyncio.apply()

OUT_DIR = os.path.join("chapter-4","results", "pred-sql-embedding-sft-experiments", "3-shot")

PROMPTS_FILE = os.path.join(OUT_DIR, "pred-sql-embedding-sft-prompts-3.json")
RESPONSES_FILE = os.path.join(OUT_DIR, "pred-sql-embedding-sft-responses-3.txt")
RESULTS_FILE = os.path.join(OUT_DIR,"pred-sql-embedding-sft-results-3.json")

results = evaluate_spider(gold="benchmarks/spider/dev_gold.sql", pred=RESPONSES_FILE, db="benchmarks/spider/databases", table="benchmarks/spider/tables.json")

save_results(PROMPTS_FILE, RESPONSES_FILE, RESULTS_FILE, results)

100%|██████████| 1/1 [00:00<00:00, 198.85it/s]
100%|██████████| 1/1 [00:00<00:00, 333.25it/s]
100%|██████████| 1/1 [00:00<00:00, 206.10it/s]
100%|██████████| 1/1 [00:00<00:00, 132.48it/s]
100%|██████████| 1/1 [00:00<00:00, 250.44it/s]
100%|██████████| 1/1 [00:00<00:00, 199.79it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 248.77it/s]
100%|██████████| 1/1 [00:00<00:00, 179.07it/s]
100%|██████████| 1/1 [00:00<00:00, 166.27it/s]
100%|██████████| 1/1 [00:00<00:00, 166.70it/s]
100%|██████████| 1/1 [00:00<00:00, 249.79it/s]
100%|██████████| 1/1 [00:00<00:00, 333.73it/s]
100%|██████████| 1/1 [00:00<00:00, 333.44it/s]
100%|██████████| 1/1 [00:00<00:00, 249.99it/s]
100%|██████████| 1/1 [00:00<00:00, 250.17it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 273.62it/s]
100%|██████████| 1/1 [00:00<00:00, 124.97it/s]
100%|██████████| 1/1 [00:00<00:00, 199.90it/s]
100%|██████████| 1/1 [00:00<00:00, 142.39it/s]
100%|██████████| 1/1 [00:00<00:

                     easy                 medium               hard                 extra                all                 
count                248                  446                  174                  166                  1034                
execution            0.895                0.839                0.724                0.518                0.781               


## Pred-SQL Fine-Tuned Embedding Selector Spider 5-shot Experiment

In [14]:
prompt = prompt_factory(k_shot = 5, repr_type= "SQL", example_format= "QA", selector_type= "EMBEDPRED", embedding_model= EMBEDDING)(data=data, tokenizer="gpt-3.5-turbo")

In [15]:
# Format all Spider test questions for 5-Shot GPT-3.5-Turbo Text-to-SQL conversion
questions = list()

for i, question_json in enumerate(tqdm(getattr(data, "get_test_json")()), start=1):
    question_format = prompt.format(index=i,
                                    target=question_json,
                                    max_seq_len=2048,
                                    max_ans_len=200,
                                    scope_factor=1,
                                    cross_domain=False)
    questions.append(question_format)

100%|██████████| 1034/1034 [06:02<00:00,  2.85it/s]


In [16]:
# Define the task parameters for saving the questions for the Spider baseline experiment

args = {
    "data_type": "spider",
    "split": "test",
    "tokenizer": "gpt-3.5-turbo",
    "max_seq_length": 2048,
    "prompt_repr": "SQL",
    "k-shot": 5,
    "example_type": "QA",
    "selector_type": "EMBEDPRED",
    "embedding_model": EMBEDDING
}

# Define the task object for saving the questions for the Spider baseline experiment

task = {
        "args": args,
        "questions": questions
    }

OUT_DIR = os.path.join("chapter-4","results", "pred-sql-embedding-sft-experiments", "5-shot")
os.makedirs(OUT_DIR, exist_ok =True)

PROMPTS_FILE = os.path.join(OUT_DIR, "pred-sql-embedding-sft-prompts-5.json")
json.dump(task, open(PROMPTS_FILE, "w"), indent=4)

In [17]:
OUT_DIR = os.path.join("chapter-4","results", "pred-sql-embedding-sft-experiments", "5-shot")

PROMPTS_FILE = os.path.join(OUT_DIR, "pred-sql-embedding-sft-prompts-5.json")

RESPONSES_FILE = os.path.join(OUT_DIR, "pred-sql-embedding-sft-responses-5.txt")

run_llm(PROMPTS_FILE, RESPONSES_FILE, model="gpt-3.5-turbo")

100%|██████████| 1034/1034 [25:20<00:00,  1.47s/it] 


In [18]:
nest_asyncio.apply()

OUT_DIR = os.path.join("chapter-4","results", "pred-sql-embedding-sft-experiments", "5-shot")

PROMPTS_FILE = os.path.join(OUT_DIR, "pred-sql-embedding-sft-prompts-5.json")
RESPONSES_FILE = os.path.join(OUT_DIR, "pred-sql-embedding-sft-responses-5.txt")
RESULTS_FILE = os.path.join(OUT_DIR,"pred-sql-embedding-sft-results-5.json")

results = evaluate_spider(gold="benchmarks/spider/dev_gold.sql", pred=RESPONSES_FILE, db="benchmarks/spider/databases", table="benchmarks/spider/tables.json")

save_results(PROMPTS_FILE, RESPONSES_FILE, RESULTS_FILE, results)

100%|██████████| 1/1 [00:00<00:00, 250.18it/s]
100%|██████████| 1/1 [00:00<00:00, 250.48it/s]
100%|██████████| 1/1 [00:00<00:00, 250.05it/s]
100%|██████████| 1/1 [00:00<00:00, 333.20it/s]
100%|██████████| 1/1 [00:00<00:00, 170.99it/s]
100%|██████████| 1/1 [00:00<00:00, 159.57it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 499.50it/s]
100%|██████████| 1/1 [00:00<00:00, 250.26it/s]
100%|██████████| 1/1 [00:00<00:00, 332.91it/s]
100%|██████████| 1/1 [00:00<00:00, 249.88it/s]
100%|██████████| 1/1 [00:00<00:00, 250.95it/s]
100%|██████████| 1/1 [00:00<00:00, 200.05it/s]
100%|██████████| 1/1 [00:00<00:00, 186.18it/s]
100%|██████████| 1/1 [00:00<00:00, 277.97it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 205.26it/s]
100%|██████████| 1/1 [00:00<00:00, 325.77it/s]
100%|██████████| 1/1 [00:00<00:00, 148.39it/s]
100%|██████████| 1/1 [00:00<00:00, 41.67it/s]
100%|██████████| 1/1 [00:00<00:00, 250.11

                     easy                 medium               hard                 extra                all                 
count                248                  446                  174                  166                  1034                
execution            0.907                0.841                0.707                0.524                0.783               
