# Prompt Gold Standard Gen

**Purpose:** Generate gold standard answers for each prompt

---
**Copyright (c) 2025 Michael Powers**

In [1]:
import os
import logging
import json
import pandas as pd
from datetime import datetime
import google.generativeai as genai
import time
import random

In [2]:
gemini_model = 'gemini-2.5-flash-lite-preview-06-17'
api_key="YOUR_API_KEY"
DEBUG = False

In [3]:
def ask_gemini_json(prompt, use_json=True, model='models/gemini-2.0-flash-lite'):
    import os
    import google.generativeai as genai
    if DEBUG:
        print(f'Prompt: {prompt}')
    genai.configure(api_key=api_key)
    model = genai.GenerativeModel(model)
    if use_json:
        generation_config = genai.GenerationConfig(response_mime_type="application/json")
        response = model.generate_content(prompt, generation_config=generation_config)
    else:
        response = model.generate_content(prompt)
    return response.text

In [4]:
def clean_response(response):
    response = response.replace("<think>", "").replace("</think>", "")
    response = response.strip()
    if response.startswith("```json") and response.endswith("```"):
        response = response[len("```json"): -len("```")].strip()
    elif response.startswith("```") and response.endswith("```"):
        response = response[len("```"): -len("```")].strip()
    if response.lower().startswith('sql'):
        response = response[3:].strip()
    return response

In [5]:
def read_string(filename):
    with open(filename) as f:
        return f.read()

In [6]:
def get_prompt(goal, sql, json_format):
    
    prompt = f"""
"You are an expert SQL parser and prompt evaluator. Your task is to extract specific information from a full SQL query based on a given 'goal'. This extracted information will serve as the gold standard for evaluating a prompt designed to achieve that specific goal within a RAG-to-SQL system.

Inputs:

1. Goal: A clear description of the specific information to be extracted from the SQL query. Examples:

"Identify the needed tables and columns."

"Identify the filtering conditions (WHERE clause)."

"Identify the grouping criteria (GROUP BY clause)."

"Identify the aggregation functions and their columns (e.g., SUM, COUNT, AVG)."

"Identify the ordering criteria (ORDER BY clause)."

"Identify the JOIN conditions and types."

"Identify the subqueries and their purpose."

"Identify the aliasing used for tables or columns."

"Identify the HAVING clause conditions."

2. Full Gold Standard SQL Answer: The complete, correct SQL query.

Output:

Based on the provided 'Goal', extract ONLY the relevant information from the 'Full Gold Standard SQL Answer'. Present the extracted information clearly and concisely. If the information relevant to the goal is not present in the SQL query, state 'Not applicable' or 'None found'.

Examples:

Example 1:

Goal: Identify the needed tables and columns.

Full Gold Standard SQL Answer: SELECT Customers.CustomerID, Customers.CustomerName, Orders.OrderDate FROM Customers JOIN Orders ON Customers.CustomerID = Orders.CustomerID WHERE Orders.OrderDate > '2023-01-01' GROUP BY Customers.CustomerID ORDER BY Customers.CustomerName;

Output:

Tables: Customers, Orders

Columns: Customers.CustomerID, Customers.CustomerName, Orders.OrderDate

Example 2:

Goal: Identify the filtering conditions (WHERE clause).

Full Gold Standard SQL Answer: SELECT ProductName, Price FROM Products WHERE Price > 50 AND Category = 'Electronics';

Output:

Filtering Conditions: Price > 50 AND Category = 'Electronics'

Example 3:

Goal: Identify the grouping criteria (GROUP BY clause).

Full Gold Standard SQL Answer: SELECT Department, COUNT(EmployeeID) FROM Employees GROUP BY Department HAVING COUNT(EmployeeID) > 10;

Output:

Grouping Criteria: Department

Example 4:

Goal: Identify the aggregation functions and their columns.

Full Gold Standard SQL Answer: SELECT AVG(Salary), MAX(HireDate) FROM Employees;

Output:

Aggregation Functions and Columns: AVG(Salary), MAX(HireDate)

Example 5:

Goal: Identify the subqueries and their purpose.

Full Gold Standard SQL Answer: SELECT CustomerName FROM Customers WHERE CustomerID IN (SELECT CustomerID FROM Orders WHERE TotalAmount > 1000);

Output:

Subquery: (SELECT CustomerID FROM Orders WHERE TotalAmount > 1000)

Purpose: To find CustomerIDs of customers who have orders with a total amount greater than 1000, which are then used to filter the main Customers table.

#########
Goal:
{goal}

Gold Standard SQL:
{sql}

Target JSON output format:
```json
{json_format}
```
"""
    return prompt


In [7]:
def process_prompt(goal_string, json_format, test_directory="./results/",
                                gold_directory="./gold/",
                                output_filepath="./results.csv",
                                model='models/gemini-2.0-flash-lite',
                                rpm_limit=15):


    RPM_LIMIT = rpm_limit
    MAX_RETRIES = 5
    BASE_SLEEP_TIME = 8.5
    df_rows = []
    i = 0
    print(f"Starting generation.")
    #Loop through all files in test_directory
    for item in os.listdir(test_directory):
        item_path = os.path.join(test_directory, item)
        if os.path.isfile(item_path):
            i+=1
            test_filename = item_path
            gold_filename = os.path.join(gold_directory, item)
            test_contents = read_string(test_filename)
            gold_contents = read_string(gold_filename)

            
            prompt = get_prompt(goal_string, gold_contents, json_format)
            
            # Counter for API calls made within the current minute
            requests_in_minute = 0
            start_time_minute = time.time()
            retries = 0
        
            while retries < MAX_RETRIES:
                # Check RPM limit
                current_time = time.time()
                if current_time - start_time_minute >= 60:
                    requests_in_minute = 0
                    start_time_minute = current_time

                if requests_in_minute >= RPM_LIMIT:
                    wait_time = 60 - (current_time - start_time_minute)
                    print(f"Rate limit hit. Waiting for {wait_time:.2f} seconds...")
                    time.sleep(wait_time + 1)
                    requests_in_minute = 0
                    start_time_minute = time.time()

                try: # LLM CALL
                    print(f'Making LLM Call: {i}')
                    response = ask_gemini_json(prompt, use_json=True, model=model)
                    requests_in_minute += 1
                
                    # Clean the response string
                    response = clean_response(response)
                    if DEBUG:
                        print(f'Response: {response}')

                    # CONVERT OR SAVE OR WHATEVER WE ARE DOING GOES HERE
                    #print('!!!!!!!!')
                    try:
                        data = json.loads(response)
                        df_rows.append({
                                "ID" : item,
                                "JSON" : response,
                            })
                        
                       
                    except Exception as e:
                        print(f'Error decoding json response: {e}')
                        print("-------DATA NO GOOD")
                        
                    break # Success, break out of retry loop
                except Exception as e:
                    retries += 1
                    sleep_duration = BASE_SLEEP_TIME * (2 ** (retries - 1)) + random.uniform(0, 1)
                    print(f"API Error : {e}. Retrying in {sleep_duration:.2f}s... (Attempt {retries}/{MAX_RETRIES})")
                    time.sleep(sleep_duration)

                if retries == MAX_RETRIES:
                    print(f"Failed to process review from input line {i} after {MAX_RETRIES} retries. Skipping.")
                    print("-------DATA NO GOOD")
    df = pd.DataFrame(df_rows)
    print(df)
    df.to_csv(output_filepath, index=False)
    return df
    print("------DONE-----")

In [8]:
# Tabels and columns
# Grouping
# Calculations
# Filtering
# NEW->Join conditions and types

#Store as JSON



In [9]:
# JUST so we know which files matter
test_directory = "../../spider/Spider2-main/spider2-lite/evaluation_suite/gemini_pro_baseline_submission/"
gold_directory = "../../spider/Spider2-main/spider2-lite/evaluation_suite/gold/sql/"

output_filepath = "./subquestions-joins.csv"
goal_string = "Identify any joins in the Gold Standard."
json_format = """
Output as a JSON object with 'joins' (list of 'join' strings) and 'reasoning' (string).
"""



process_prompt(goal_string, json_format, test_directory, gold_directory,output_filepath,gemini_model)
print('-------DONE----')

Starting generation.
Making LLM Call: 1
Making LLM Call: 2
Making LLM Call: 3
Making LLM Call: 4
Making LLM Call: 5
Making LLM Call: 6
Making LLM Call: 7
Making LLM Call: 8
Making LLM Call: 9
Making LLM Call: 10
Making LLM Call: 11
Making LLM Call: 12
Making LLM Call: 13
Making LLM Call: 14
Making LLM Call: 15
Making LLM Call: 16
Making LLM Call: 17
Making LLM Call: 18
Making LLM Call: 19
Making LLM Call: 20
Making LLM Call: 21
Making LLM Call: 22
Making LLM Call: 23
Making LLM Call: 24
              ID                                               JSON
0   local301.sql  {\n"joins": [],\n"reasoning": "The provided SQ...
1   local075.sql  {\n"joins": [\n"JOIN shopping_cart_events AS t...
2   local058.sql  {\n  "joins": [\n    "hardware_fact_sales_mont...
3   local065.sql  {\n"joins": [\n"JOIN pizza_clean_runner_orders...
4   local099.sql  {\n  "joins": [\n    "M_Cast MC JOIN M_Directo...
5   local066.sql  {\n"joins": [\n"LEFT JOIN split_regular_toppin...
6   local029.sql  {\n"joins": [\