# Random Selection BIRD Experiments 

In [None]:
import os
import json
import multiprocessing
from tqdm import tqdm
from utils.prompts.prompt_builder import prompt_factory
from utils.data.data_builder import load_data
from utils.llm.ask_llm import run_llm
from third_party.bird_eval.evaluation import reformat_responses
from third_party.bird_eval.evaluation import evaluate_bird
from utils.data.post_process import save_results
from utils.prompts.prompt_builder import get_openai_key

EMBEDDING = None

In [1]:
from openai import OpenAI

# Set the environment variable to provide access to the OpenAI API (set in utils/parameters.py)
os.environ['OPENAI_API_KEY'] = get_openai_key()

# Create the OpenAI client
client = OpenAI()

In [3]:
# Load the BIRD dataset

path_data = "benchmarks"

data = load_data("bird", path_data)

## One-Shot "random" Experiment

In [4]:
# Instantiate a 1-shot random prompt factory using Question Masked Euclidean Distance selection

prompt = prompt_factory(k_shot = 1, repr_type= "SQL", example_format= "QA", selector_type= "RANDOM")(data=data, tokenizer="gpt-3.5-turbo")

In [5]:
# Format all BIRD test questions for 1-shot GPT-3.5-Turbo Text-to-SQL conversion

questions = list()

for i, question_json in enumerate(tqdm(getattr(data, "get_test_json")()), start=1):
    
    question_format = prompt.format(index = i,
                            target=question_json,
                            max_seq_len=2048,
                            max_ans_len =200,
                            scope_factor=100)
    
    questions.append(question_format)

100%|██████████| 1534/1534 [01:10<00:00, 21.75it/s]


In [6]:
# List the model parameters used across the experiment
args = {
    "data_type": "bird",
    "split": "test",
    "tokenizer": "gpt-3.5-turbo",
    "max_seq_length": 2048,
    "prompt_repr": "SQL",
    "k-shot": 1,
    "example_type": "QA",
    "selector_type": "RANDOM",
    "embedding_model": None
}

# Define the task dictionary to save the formatted questions and metadata of the BIRD experiment
task = {
        "args": args,
        "questions": questions
    }

# Save the 1-shot random BIRD prompts to .json file for processing
OUT_DIR = os.path.join("chapter-3","results","bird","random-experiments","1-shot")
os.makedirs(OUT_DIR, exist_ok =True)

PROMPTS_FILE = os.path.join(OUT_DIR,"random-prompts-1.json")
json.dump(task, open(PROMPTS_FILE, "w"), indent=4)

In [None]:
# Send the formatted prompts to the GPT-3.5-TURBO model for response generation

OUT_DIR = os.path.join("chapter-3","results","bird","random-experiments","1-shot")

RESPONSES_FILE = os.path.join(OUT_DIR,"random-responses-1.txt")

run_llm(PROMPTS_FILE, RESPONSES_FILE, model="gpt-3.5-turbo")

In [None]:
# Reformat the GPT-3.5-Responses file for use in the BIRD evaluation script

OUT_DIR = os.path.join("chapter-3","results","bird","random-experiments","1-shot")

RESPONSES_FILE = os.path.join(OUT_DIR,"random-responses-1.txt")

OUT_FILE = os.path.join(OUT_DIR,"random-responses-1.json")

reformat_responses(PROMPTS_FILE, RESPONSES_FILE, OUT_FILE)

In [7]:
# Evaluate the generated GPT-3.5-Turbo responses and save results to file

OUT_DIR = os.path.join("chapter-3","results","bird","random-experiments","1-shot")

PROMPTS_FILE = os.path.join(OUT_DIR,"random-prompts-1.json")
RESPONSES_TXT = os.path.join(OUT_DIR,"random-responses-1.txt")
RESPONSES_JSON = os.path.join(OUT_DIR,"random-responses-1.json")
RESULTS_FILE = os.path.join(OUT_DIR,"random-results-1.json")

results = evaluate_bird(gold="benchmarks/bird/dev.sql", 
                        pred=RESPONSES_JSON, 
                        db="benchmarks/bird/databases/", 
                        diff_json_path="benchmarks/bird/dev.json",
                        data_mode='dev',
                        num_cpus= multiprocessing.cpu_count())

save_results(PROMPTS_FILE, RESPONSES_TXT, RESULTS_FILE, results)

Result received for query pair 4: {'sql_idx': 4, 'res': 0}
Result received for query pair 2: {'sql_idx': 2, 'res': 0}
Result received for query pair 9: {'sql_idx': 9, 'res': 0}
Result received for query pair 10: {'sql_idx': 10, 'res': 0}
Result received for query pair 7: {'sql_idx': 7, 'res': 1}
Result received for query pair 0: {'sql_idx': 0, 'res': 0}
Result received for query pair 12: {'sql_idx': 12, 'res': 0}
Result received for query pair 8: {'sql_idx': 8, 'res': 1}
Result received for query pair 15: {'sql_idx': 15, 'res': 0}
Result received for query pair 3: {'sql_idx': 3, 'res': 0}
Result received for query pair 17: {'sql_idx': 17, 'res': 1}
Result received for query pair 11: {'sql_idx': 11, 'res': 1}
Result received for query pair 19: {'sql_idx': 19, 'res': 0}
Result received for query pair 14: {'sql_idx': 14, 'res': 0}
Result received for query pair 1: {'sql_idx': 1, 'res': 0}
Result received for query pair 18: {'sql_idx': 18, 'res': 1}
Result received for query pair 22: {'sql

## 3-Shot "random" Experiment

In [8]:
# Instantiate a 3-shot random prompt factory using Question Masked Euclidean Distance selection

prompt = prompt_factory(k_shot = 3, repr_type= "SQL", example_format= "QA", selector_type= "RANDOM", embedding_model= None)(data=data, tokenizer="gpt-3.5-turbo")

In [9]:
# Format all BIRD test questions for 3-shot GPT-3.5-Turbo Text-to-SQL conversion

questions = list()

for i, question_json in enumerate(tqdm(getattr(data, "get_test_json")()), start=1):
    
    question_format = prompt.format(index = i,
                            target=question_json,
                            max_seq_len=2048,
                            max_ans_len =200,
                            scope_factor=100)
    
    questions.append(question_format)

100%|██████████| 1534/1534 [03:40<00:00,  6.94it/s]


In [10]:
# List the model parameters used across the BIRD experiment
args = {
    "data_type": "bird",
    "split": "test",
    "tokenizer": "gpt-3.5-turbo",
    "max_seq_length": 2048,
    "prompt_repr": "SQL",
    "k-shot": 3,
    "example_type": "QA",
    "selector_type": "RANDOM",
    "embedding_model": None
}


# Define the task dictionary to save the formatted questions and metadata of the BIRD experiment
task = {
        "args": args,
        "questions": questions
    }


# Save the 3-shot random BIRD prompts to .json file for processing

OUT_DIR = os.path.join("chapter-3","results","bird","random-experiments","3-shot")
os.makedirs(OUT_DIR, exist_ok =True)

PROMPTS_FILE = os.path.join(OUT_DIR,"random-prompts-3.json")
json.dump(task, open(PROMPTS_FILE, "w"), indent=4)

In [None]:
# Send the formatted prompts to the GPT-3.5-TURBO model for response generation

OUT_DIR = os.path.join("chapter-3","results","bird","random-experiments","3-shot")

RESPONSES_FILE = os.path.join(OUT_DIR,"random-responses-3.txt")

run_llm(PROMPTS_FILE, RESPONSES_FILE, model="gpt-3.5-turbo")

In [None]:
# Reformat the GPT-3.5-Responses file for use in the BIRD evaluation script

OUT_DIR = os.path.join("chapter-3","results","bird","random-experiments","3-shot")

RESPONSES_FILE = os.path.join(OUT_DIR,"random-responses-3.txt")

OUT_FILE = os.path.join(OUT_DIR,"random-responses-3.json")

reformat_responses(PROMPTS_FILE, RESPONSES_FILE, OUT_FILE)

In [11]:
# Evaluate the generated GPT-3.5-Turbo responses and save results to file

OUT_DIR = os.path.join("chapter-3","results","bird","random-experiments","3-shot")

PROMPTS_FILE = os.path.join(OUT_DIR,"random-prompts-3.json")
RESPONSES_TXT = os.path.join(OUT_DIR,"random-responses-3.txt")
RESPONSES_JSON = os.path.join(OUT_DIR,"random-responses-3.json")
RESULTS_FILE = os.path.join(OUT_DIR,"random-results-3.json")

results = evaluate_bird(gold="benchmarks/bird/dev.sql", 
                        pred=RESPONSES_JSON, 
                        db="benchmarks/bird/databases/", 
                        diff_json_path="benchmarks/bird/dev.json",
                        data_mode='dev',
                        num_cpus= multiprocessing.cpu_count())

save_results(PROMPTS_FILE, RESPONSES_TXT, RESULTS_FILE, results)

Result received for query pair 1: {'sql_idx': 1, 'res': 0}
Result received for query pair 5: {'sql_idx': 5, 'res': 0}
Result received for query pair 6: {'sql_idx': 6, 'res': 0}
Result received for query pair 10: {'sql_idx': 10, 'res': 0}
Result received for query pair 0: {'sql_idx': 0, 'res': 0}
Result received for query pair 12: {'sql_idx': 12, 'res': 0}
Result received for query pair 8: {'sql_idx': 8, 'res': 1}
Result received for query pair 7: {'sql_idx': 7, 'res': 1}
Result received for query pair 3: {'sql_idx': 3, 'res': 0}
Result received for query pair 15: {'sql_idx': 15, 'res': 0}
Result received for query pair 17: {'sql_idx': 17, 'res': 0}
Result received for query pair 11: {'sql_idx': 11, 'res': 1}
Result received for query pair 19: {'sql_idx': 19, 'res': 0}
Result received for query pair 13: {'sql_idx': 13, 'res': 0}
Result received for query pair 2: {'sql_idx': 2, 'res': 0}
Result received for query pair 18: {'sql_idx': 18, 'res': 1}
Result received for query pair 9: {'sql_

## 5-Shot "random" Experiment

In [12]:
# Instantiate a 5-shot random prompt factory using Question Masked Euclidean Distance selection

prompt = prompt_factory(k_shot = 5, repr_type= "SQL", example_format= "QA", selector_type= "RANDOM", embedding_model= None)(data=data, tokenizer="gpt-3.5-turbo")

In [13]:
# Format all BIRD test questions for 5-shot GPT-3.5-Turbo Text-to-SQL conversion

questions = list()

for i, question_json in enumerate(tqdm(getattr(data, "get_test_json")()), start=1):
    
    question_format = prompt.format(index = i,
                            target=question_json,
                            max_seq_len=2048,
                            max_ans_len =200,
                            scope_factor=100)
    
    questions.append(question_format)

100%|██████████| 1534/1534 [05:57<00:00,  4.29it/s]


In [14]:
# List the model parameters used across the BIRD experiment
args = {
    "data_type": "bird",
    "split": "test",
    "tokenizer": "gpt-3.5-turbo",
    "max_seq_length": 2048,
    "prompt_repr": "SQL",
    "k-shot": 5,
    "example_type": "QA",
    "selector_type": "RANDOM",
    "embedding_model": None
}

# Define the task dictionary to save the formatted questions and metadata of the BIRD experiment
task = {
        "args": args,
        "questions": questions
    }

# Save the 5-shot random BIRD prompts to .json file for processing
OUT_DIR = os.path.join("chapter-3","results","bird","random-experiments","5-shot")
os.makedirs(OUT_DIR, exist_ok =True)

PROMPTS_FILE = os.path.join(OUT_DIR,"random-prompts-5.json")
json.dump(task, open(PROMPTS_FILE, "w"), indent=4)

In [None]:
# Send the formatted prompts to the GPT-3.5-TURBO model for response generation

OUT_DIR = os.path.join("chapter-3","results","bird","random-experiments","5-shot")

RESPONSES_FILE = os.path.join(OUT_DIR,"random-responses-5.txt")

run_llm(PROMPTS_FILE, RESPONSES_FILE, model="gpt-3.5-turbo")

In [None]:
# Reformat the GPT-3.5-Responses file for use in the BIRD evaluation script

OUT_DIR = os.path.join("chapter-3","results","bird","random-experiments","5-shot")

RESPONSES_FILE = os.path.join(OUT_DIR,"random-responses-5.txt")

OUT_FILE = os.path.join(OUT_DIR,"random-responses-5.json")

reformat_responses(PROMPTS_FILE, RESPONSES_FILE, OUT_FILE)

In [15]:
# Evaluate the generated GPT-3.5-Turbo responses and save results to file

OUT_DIR = os.path.join("chapter-3","results","bird","random-experiments","5-shot")

PROMPTS_FILE = os.path.join(OUT_DIR,"random-prompts-5.json")
RESPONSES_TXT = os.path.join(OUT_DIR,"random-responses-5.txt")
RESPONSES_JSON = os.path.join(OUT_DIR,"random-responses-5.json")
RESULTS_FILE = os.path.join(OUT_DIR,"random-results-5.json")

results = evaluate_bird(gold="benchmarks/bird/dev.sql", 
                        pred=RESPONSES_JSON, 
                        db="benchmarks/bird/databases/", 
                        diff_json_path="benchmarks/bird/dev.json",
                        data_mode='dev',
                        num_cpus= multiprocessing.cpu_count())

save_results(PROMPTS_FILE, RESPONSES_TXT, RESULTS_FILE, results)

Result received for query pair 1: {'sql_idx': 1, 'res': 0}
Result received for query pair 5: {'sql_idx': 5, 'res': 0}
Result received for query pair 9: {'sql_idx': 9, 'res': 0}
Result received for query pair 10: {'sql_idx': 10, 'res': 0}
Result received for query pair 6: {'sql_idx': 6, 'res': 0}
Result received for query pair 7: {'sql_idx': 7, 'res': 1}
Result received for query pair 12: {'sql_idx': 12, 'res': 0}
Result received for query pair 0: {'sql_idx': 0, 'res': 0}
Result received for query pair 15: {'sql_idx': 15, 'res': 0}
Result received for query pair 3: {'sql_idx': 3, 'res': 0}
Result received for query pair 8: {'sql_idx': 8, 'res': 1}
Result received for query pair 17: {'sql_idx': 17, 'res': 1}
Result received for query pair 19: {'sql_idx': 19, 'res': 0}
Result received for query pair 11: {'sql_idx': 11, 'res': 1}
Result received for query pair 20: {'sql_idx': 20, 'res': 0}
Result received for query pair 22: {'sql_idx': 22, 'res': 0}
Result received for query pair 21: {'sql