# text-embedding-3-large Spider Experiments

In [1]:
import os
import json
import nest_asyncio
from tqdm import tqdm
from utils.prompts.prompt_builder import prompt_factory
from utils.data.data_builder import load_data
from utils.llm.ask_llm import run_llm
from utils.data.post_process import save_results
from third_party.spider_eval.evaluation import evaluate_spider
from utils.prompts.prompt_builder import get_openai_key

EMBEDDING = "text-embedding-3-large"

In [2]:
from openai import OpenAI

# Set the environment variable to provide access to the OpenAI API (set in utils/parameters.py)
os.environ['OPENAI_API_KEY'] = get_openai_key()

# Create the OpenAI client
client = OpenAI()

In [3]:
# Load the Spider dataset

path_data = "benchmarks"

data = load_data("spider", path_data)

## One-Shot "text-embedding-3-large" Example Experiment

In [4]:
# Instantiate a 1-shot text-embedding-3-large prompt factory using Question Masked Euclidean Distance selection

prompt = prompt_factory(k_shot = 1, repr_type= "SQL", example_format= "QA", selector_type= "EUCDISQUESTIONMASK", embedding_model= EMBEDDING)(data=data, tokenizer="gpt-3.5-turbo")

Generating OpenAI embeddings... this may take a while.


100%|██████████| 8659/8659 [1:03:51<00:00,  2.26it/s]


In [5]:
# Format all Spider test questions for 1-Shot GPT-3.5-Turbo Text-to-SQL conversion

questions = list()

for i, question_json in enumerate(tqdm(getattr(data, "get_test_json")()), start=1):
    
    question_format = prompt.format(index=i,
                                    target=question_json,
                                    max_seq_len=2048,
                                    max_ans_len=200,
                                    scope_factor=1,
                                    cross_domain=False)
    
    questions.append(question_format)

100%|██████████| 1034/1034 [28:51<00:00,  1.68s/it] 


In [None]:
# List the model parameters used across the experiment
args = {
    "data_type": "spider",
    "split": "test",
    "tokenizer": "gpt-3.5-turbo",
    "max_seq_length": 2048,
    "prompt_repr": "SQL",
    "k-shot": 1,
    "example_type": "QA",
    "selector_type": "EUCDISQUESTIONMASK",
    "embedding_model": EMBEDDING
}

# Define the task dictionary to save the formatted questions and metadata of the Spider experiment
task = {
        "args": args,
        "questions": questions
    }

# Save the 1-shot text-embedding-3-large Spider prompts to .json file for processing
OUT_DIR = os.path.join("chapter-3","results","spider","text-embedding-3-large-experiments","1-shot")
os.makedirs(OUT_DIR, exist_ok =True)

PROMPTS_FILE = os.path.join(OUT_DIR,"text-embedding-3-large-prompts-1.json")
json.dump(task, open(PROMPTS_FILE, "w"), indent=4)

In [None]:
# Send the formatted prompts to the GPT-3.5-TURBO model for response generation

OUT_DIR = os.path.join("chapter-3","results","spider","text-embedding-3-large-experiments","1-shot")

RESPONSES_FILE = os.path.join(OUT_DIR, "text-embedding-3-large-responses-1.txt")

run_llm(PROMPTS_FILE, RESPONSES_FILE, model="gpt-3.5-turbo")

In [4]:
# Evaluate the generated GPT-3.5-Turbo responses and save results to file
nest_asyncio.apply()

OUT_DIR = os.path.join("chapter-3","results","spider","text-embedding-3-large-experiments","1-shot")

PROMPTS_FILE = os.path.join(OUT_DIR, "text-embedding-3-large-prompts-1.json")
RESPONSES_FILE = os.path.join(OUT_DIR, "text-embedding-3-large-responses-1.txt")
RESULTS_FILE = os.path.join(OUT_DIR,"text-embedding-3-large-results-1.json")

results = evaluate_spider(gold="benchmarks/spider/dev_gold.sql", pred=RESPONSES_FILE, db="benchmarks/spider/databases", table="benchmarks/spider/tables.json")

save_results(PROMPTS_FILE, RESPONSES_FILE, RESULTS_FILE, results)

100%|██████████| 1/1 [00:00<00:00, 395.02it/s]
100%|██████████| 1/1 [00:00<00:00, 499.56it/s]
100%|██████████| 1/1 [00:00<00:00, 248.74it/s]
100%|██████████| 1/1 [00:00<00:00, 242.31it/s]
100%|██████████| 1/1 [00:00<00:00, 221.03it/s]
100%|██████████| 1/1 [00:00<00:00, 229.01it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 221.50it/s]
100%|██████████| 1/1 [00:00<00:00, 182.35it/s]
100%|██████████| 1/1 [00:00<00:00, 332.99it/s]
100%|██████████| 1/1 [00:00<00:00, 233.69it/s]
100%|██████████| 1/1 [00:00<00:00, 435.46it/s]
100%|██████████| 1/1 [00:00<00:00, 488.68it/s]
100%|██████████| 1/1 [00:00<00:00, 281.82it/s]
100%|██████████| 1/1 [00:00<00:00, 165.98it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 198.39it/s]
100%|██████████| 1/1 [00:00<00:00, 249.96it/s]
100%|██████████| 1/1 [00:00<00:00, 333.36it/s]
100%|██████████| 1/1 [00:00<00:00, 310.48it/s]
100%|██████████| 1/1 [00:00<00:00, 132.4

                     easy                 medium               hard                 extra                all                 
count                248                  446                  174                  166                  1034                
execution            0.887                0.821                0.690                0.488                0.761               


## Three-Shot "text-embedding-3-large" Example Experiment

In [None]:
# Instantiate a 3-shot text-embedding-3-large prompt factory using Question Masked Euclidean Distance selection

prompt = prompt_factory(k_shot = 3, repr_type= "SQL", example_format= "QA", selector_type= "EUCDISQUESTIONMASK", embedding_model= EMBEDDING)(data=data, tokenizer="gpt-3.5-turbo")

In [None]:
# Format all Spider test questions for 1-Shot GPT-3.5-Turbo Text-to-SQL conversion

questions = list()

for i, question_json in enumerate(tqdm(getattr(data, "get_test_json")()), start=1):
    
    question_format = prompt.format(index=i,
                                    target=question_json,
                                    max_seq_len=2048,
                                    max_ans_len=200,
                                    scope_factor=1,
                                    cross_domain=False)
    
    questions.append(question_format)

In [None]:
# List the model parameters used across the experiment
args = {
    "data_type": "spider",
    "split": "test",
    "tokenizer": "gpt-3.5-turbo",
    "max_seq_length": 2048,
    "prompt_repr": "SQL",
    "k-shot": 3,
    "example_type": "QA",
    "selector_type": "EUCDISQUESTIONMASK",
    "embedding_model": EMBEDDING
}

# Define the task dictionary to save the formatted questions and metadata of the Spider experiment
task = {
        "args": args,
        "questions": questions
    }

# Save the 3-shot text-embedding-3-large Spider prompts to .json file for processing
OUT_DIR = os.path.join("chapter-3","results","spider","text-embedding-3-large-experiments","3-shot")
os.makedirs(OUT_DIR, exist_ok =True)

PROMPTS_FILE = os.path.join(OUT_DIR,"text-embedding-3-large-prompts-3.json")
json.dump(task, open(PROMPTS_FILE, "w"), indent=4)

In [None]:
# Send the formatted prompts to the GPT-3.5-TURBO model for response generation

OUT_DIR = os.path.join("chapter-3","results","spider","text-embedding-3-large-experiments","3-shot")

RESPONSES_FILE = os.path.join(OUT_DIR, "text-embedding-3-large-responses-3.txt")

run_llm(PROMPTS_FILE, RESPONSES_FILE, model="gpt-3.5-turbo")

In [5]:
# Evaluate the generated GPT-3.5-Turbo responses and save results to file
nest_asyncio.apply()

OUT_DIR = os.path.join("chapter-3","results","spider","text-embedding-3-large-experiments","3-shot")

PROMPTS_FILE = os.path.join(OUT_DIR, "text-embedding-3-large-prompts-3.json")
RESPONSES_FILE = os.path.join(OUT_DIR, "text-embedding-3-large-responses-3.txt")
RESULTS_FILE = os.path.join(OUT_DIR,"text-embedding-3-large-results-3.json")

results = evaluate_spider(gold="benchmarks/spider/dev_gold.sql", pred=RESPONSES_FILE, db="benchmarks/spider/databases", table="benchmarks/spider/tables.json")

save_results(PROMPTS_FILE, RESPONSES_FILE, RESULTS_FILE, results)

100%|██████████| 1/1 [00:00<00:00, 153.53it/s]
100%|██████████| 1/1 [00:00<00:00, 180.36it/s]
100%|██████████| 1/1 [00:00<00:00, 161.03it/s]
100%|██████████| 1/1 [00:00<00:00, 249.57it/s]
100%|██████████| 1/1 [00:00<00:00, 284.84it/s]
100%|██████████| 1/1 [00:00<00:00, 332.91it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 236.90it/s]
100%|██████████| 1/1 [00:00<00:00, 333.60it/s]
100%|██████████| 1/1 [00:00<00:00, 250.05it/s]
100%|██████████| 1/1 [00:00<00:00, 220.51it/s]
100%|██████████| 1/1 [00:00<00:00, 283.67it/s]
100%|██████████| 1/1 [00:00<00:00, 250.09it/s]
100%|██████████| 1/1 [00:00<00:00, 249.78it/s]
100%|██████████| 1/1 [00:00<00:00, 220.90it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 221.74it/s]
100%|██████████| 1/1 [00:00<00:00, 220.63it/s]
100%|██████████| 1/1 [00:00<00:00, 244.88it/s]
100%|██████████| 1/1 [00:00<00:00, 232.82it/s]
100%|██████████| 1/1 [00:00<00:00, 250.2

                     easy                 medium               hard                 extra                all                 
count                248                  446                  174                  166                  1034                
execution            0.907                0.839                0.718                0.476                0.777               


## Five-Shot "text-embedding-3-large" Example Experiment

In [None]:
# Instantiate a 5-shot text-embedding-3-large prompt factory using Question Masked Euclidean Distance selection

prompt = prompt_factory(k_shot = 5, repr_type= "SQL", example_format= "QA", selector_type= "EUCDISQUESTIONMASK", embedding_model= EMBEDDING)(data=data, tokenizer="gpt-3.5-turbo")

In [None]:
# Format all Spider test questions for 1-Shot GPT-3.5-Turbo Text-to-SQL conversion

questions = list()

for i, question_json in enumerate(tqdm(getattr(data, "get_test_json")()), start=1):
    
    question_format = prompt.format(index=i,
                                    target=question_json,
                                    max_seq_len=2048,
                                    max_ans_len=200,
                                    scope_factor=1,
                                    cross_domain=False)
    
    questions.append(question_format)

In [None]:
# List the model parameters used across the experiment
args = {
    "data_type": "spider",
    "split": "test",
    "tokenizer": "gpt-3.5-turbo",
    "max_seq_length": 2048,
    "prompt_repr": "SQL",
    "k-shot": 5,
    "example_type": "QA",
    "selector_type": "EUCDISQUESTIONMASK",
    "embedding_model": EMBEDDING
}

# Define the task dictionary to save the formatted questions and metadata of the Spider experiment
task = {
        "args": args,
        "questions": questions
    }

# Save the 5-shot text-embedding-3-large Spider prompts to .json file for processing
OUT_DIR = os.path.join("chapter-3","results","spider","text-embedding-3-large-experiments","5-shot")
os.makedirs(OUT_DIR, exist_ok =True)

PROMPTS_FILE = os.path.join(OUT_DIR,"text-embedding-3-large-prompts-5.json")
json.dump(task, open(PROMPTS_FILE, "w"), indent=4)

In [None]:
# Send the formatted prompts to the GPT-3.5-TURBO model for response generation

OUT_DIR = os.path.join("chapter-3","results","spider","text-embedding-3-large-experiments","5-shot")

RESPONSES_FILE = os.path.join(OUT_DIR, "text-embedding-3-large-responses-5.txt")

run_llm(PROMPTS_FILE, RESPONSES_FILE, model="gpt-3.5-turbo")

In [6]:
# Evaluate the generated GPT-3.5-Turbo responses and save results to file
nest_asyncio.apply()

OUT_DIR = os.path.join("chapter-3","results","spider","text-embedding-3-large-experiments","5-shot")

PROMPTS_FILE = os.path.join(OUT_DIR, "text-embedding-3-large-prompts-5.json")
RESPONSES_FILE = os.path.join(OUT_DIR, "text-embedding-3-large-responses-5.txt")
RESULTS_FILE = os.path.join(OUT_DIR,"text-embedding-3-large-results-5.json")

results = evaluate_spider(gold="benchmarks/spider/dev_gold.sql", pred=RESPONSES_FILE, db="benchmarks/spider/databases", table="benchmarks/spider/tables.json")

save_results(PROMPTS_FILE, RESPONSES_FILE, RESULTS_FILE, results)

100%|██████████| 1/1 [00:00<00:00, 116.13it/s]
100%|██████████| 1/1 [00:00<00:00, 131.80it/s]
100%|██████████| 1/1 [00:00<00:00, 116.86it/s]
100%|██████████| 1/1 [00:00<00:00, 165.29it/s]
100%|██████████| 1/1 [00:00<00:00, 159.62it/s]
100%|██████████| 1/1 [00:00<00:00, 302.07it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 387.18it/s]
100%|██████████| 1/1 [00:00<00:00, 236.75it/s]
100%|██████████| 1/1 [00:00<00:00, 200.20it/s]
100%|██████████| 1/1 [00:00<00:00, 334.42it/s]
100%|██████████| 1/1 [00:00<00:00, 333.12it/s]
100%|██████████| 1/1 [00:00<00:00, 165.91it/s]
100%|██████████| 1/1 [00:00<00:00, 282.01it/s]
100%|██████████| 1/1 [00:00<00:00, 249.97it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 333.68it/s]
100%|██████████| 1/1 [00:00<00:00, 250.35it/s]
100%|██████████| 1/1 [00:00<00:00, 249.32it/s]
100%|██████████| 1/1 [00:00<00:00, 250.44it/s]
100%|██████████| 1/1 [00:00<00:00, 222.2

                     easy                 medium               hard                 extra                all                 
count                248                  446                  174                  166                  1034                
execution            0.903                0.836                0.741                0.512                0.784               
