# Zero-Shot Spider Experiment

In [None]:
# Perform a single Text-to-SQL conversion using GPT-3.5-Turbo. Define necessary imports.
import os
import json
import nest_asyncio
from tqdm import tqdm
from utils.prompts.prompt_builder import prompt_factory
from utils.data.data_builder import load_data
from utils.llm.ask_llm import run_llm
from utils.data.post_process import save_results
from third_party.spider_eval.evaluation import evaluate_spider
from utils.prompts.prompt_builder import get_openai_key

EMBEDDING = None

In [None]:
from openai import OpenAI

# Set the environment variable to provide access to the OpenAI API (set in utils/parameters.py)
os.environ['OPENAI_API_KEY'] = get_openai_key()

# Create the OpenAI client
client = OpenAI()

In [None]:
# Load the Spider dataset

path_data = "benchmarks"

data = load_data("spider", path_data)

## Spider Baseline Zero-Shot Assessment (Prompt Construction)

In [None]:
# Instantiate a prompt factory object for generating LLM inputs for spider
# Set with default parameters: CR Organisation, Zero-Shot Prompting, No Example Format, No Selector Type.

prompt = prompt_factory(k_shot= 0, repr_type= "SQL", example_format= None, selector_type= None, embedding_model= None)(data=data, tokenizer="gpt-3.5-turbo")

In [None]:
# Format all Spider training examples for GPT-3.5-Turbo conversion
questions = list()

# Use default prompt.format() args demonstrated in DAIL-SQL study

for i, question_json in enumerate(tqdm(getattr(data, "get_test_json")()), start=1):
    
    question_format = prompt.format(index = i,
                            target=question_json,
                            max_seq_len=2048,
                            max_ans_len =200,
                            scope_factor=1)
    
    questions.append(question_format)

In [None]:
# List the model parameters used across the experiment
args = {
    "data_type": "spider",
    "split": "test",
    "tokenizer": "gpt-3.5-turbo",
    "max_seq_length": 2048,
    "prompt_repr": "SQL",
    "k-shot": 0,
    "example_type": None,
    "selector_type": None
}

# Define the task dictionary to save the formatted questions and metadata of the Spider experiment
task = {
        "args": args,
        "questions": questions
    }

# Save the zero-shot all-MiniLM-L6-v2 Spider prompts to .json file for processing
OUT_DIR = os.path.join("chapter-3","results","spider","zero-shot-experiments")
os.makedirs(OUT_DIR, exist_ok =True)

PROMPTS_FILE = os.path.join(OUT_DIR,"zero-shot-prompts.json")
json.dump(task, open(PROMPTS_FILE, "w"), indent=4)

## Spider Baseline Zero-Shot Assessment (SQL Conversion)

In [None]:
# Send the formatted prompts to the GPT-3.5-TURBO model for response generation

OUT_DIR = os.path.join("chapter-3","results","spider","zero-shot-experiments")

RESPONSES_FILE = os.path.join(OUT_DIR,"zero-shot-responses.txt")

run_llm(PROMPTS_FILE, RESPONSES_FILE, model="gpt-3.5-turbo")

## Spider Baseline Zero-Shot Assessment (Answer Evaluation)

In [2]:
nest_asyncio.apply()

OUT_DIR = os.path.join("chapter-3","results","spider","zero-shot-experiments")

PROMPTS_FILE = os.path.join(OUT_DIR,"zero-shot-prompts.json")
RESPONSES_FILE = os.path.join(OUT_DIR,"zero-shot-responses.txt")
RESULTS_FILE = os.path.join(OUT_DIR,"zero-shot-results.json")

results = evaluate_spider(gold="benchmarks/spider/dev_gold.sql", pred=RESPONSES_FILE, db="benchmarks/spider/databases", table="benchmarks/spider/tables.json")

save_results(PROMPTS_FILE, RESPONSES_FILE, RESULTS_FILE, results)

100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 63.08it/s]
100%|██████████| 1/1 [00:00<00:00, 63.98it/s]
100%|██████████| 1/1 [00:00<00:00, 109.78it/s]
100%|██████████| 1/1 [00:00<00:00, 73.96it/s]
100%|██████████| 1/1 [00:00<00:00, 109.72it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 364.34it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 60.69it/s]
100%|██████████| 1/1 [00:00<00:00, 169.62it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<00:00, 1008.97it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
  0%|          | 0/1 [00:00<?, ?it/s]
100%|█████

                     easy                 medium               hard                 extra                all                 
count                248                  446                  174                  166                  1034                
execution            0.883                0.794                0.546                0.464                0.721               
