# Import Libraries

In [1]:
from text_to_sql import (
    TextToSQL,
    Config,
    LLMConfig,
    SLConfig,
    ContextConfig,
    QueryConfig,
)
from dotenv import load_dotenv

import os




# Load Environment

In [2]:
load_dotenv()

True

# Config

In [3]:
config = Config(
    rewriter_config=LLMConfig(
        type="api",
        model="gemini-1.5-flash",
        provider="gemini",
        api_key=os.getenv("API_KEY"),
    ),
    query_generator_config=LLMConfig(
        type="api",
        model="gemini-1.5-flash",
        provider="gemini",
        api_key=os.getenv("API_KEY"),
    ),
    schema_linker_config=SLConfig(
        type="api",
        model="gemini-1.5-flash",
        provider="gemini",
        api_key=os.getenv("API_KEY"),
        schema_path="./metadata/sakila.json",
    ),
    retrieve_context_config=ContextConfig(data_path="./dataset/dataset_sakila.csv"),
    query_executor_config=QueryConfig(
        host=os.getenv("DB_HOST"),
        database=os.getenv("DB_DATABASE"),
        user=os.getenv("DB_USER"),
        password=os.getenv("DB_PASSWORD"),
        port=os.getenv("DB_PORT"),
    ),
)

# Model

In [4]:
text_to_sql_model = TextToSQL(config=config)

Initializing API client for gemini using model gemini-1.5-flash.
Initializing API client for gemini using model gemini-1.5-flash.
Initializing API client for gemini using model gemini-1.5-flash.


  from google.protobuf import service as _service


# Experiment

In [5]:
import pandas as pd
dataset = pd.read_csv("./dataset/dataset_sakila.csv")
dataset = dataset[dataset["Summary"].notna()]

In [6]:
dataset

Unnamed: 0,Question,Answer,Summary,Alternative Prompt 1,Alternative Prompt 2
0,Which actors have the first name ‘Scarlett’,SELECT * FROM actor WHERE first_name = 'Scarle...,This SQL query retrieves all records from the ...,Can you give me actors that have first name Sc...,I want to know actors that have the first name...
1,How many distinct actors last names are there?,SELECT COUNT(DISTINCT last_name) FROM actor;,This SQL query calculates the count of distinc...,,
2,Which actor has appeared in the most films?,"SELECT \r\n a.actor_id, \r\n a.first_nam...",This SQL query identifies the actor who has ap...,,
3,List the top five genres in gross revenue in d...,"SELECT \r\n c.name AS genre, \r\n SUM(p....",This SQL query calculates the top 5 highest-gr...,,
4,"Write a query to display how much business, in...","SELECT s.store_id, SUM(p.amount) AS total_reve...",This SQL query calculates the total revenue ge...,,
5,Which language is used in most films?,SELECT \n l.name\nFROM \n language l\n ...,This SQL query identifies the most frequently ...,,
6,List the top five customers in number of rente...,"SELECT\n c.customer_id,\n c.first_name,\...",This SQL query identifies the top 5 customers ...,,
7,Which customers have rented films from more th...,"SELECT \r\n c.customer_id, \r\n c.first_...",This SQL query identifies customers who have r...,,
8,Which films have never been rented out? Show t...,"SELECT \r\n f.film_id, \r\n f.title, \r\...",This SQL query identifies films that have neve...,,
9,What is the total revenue generated by each ac...,"SELECT \r\n a.actor_id, \r\n a.first_nam...",This SQL query calculates the total revenue ge...,,


In [7]:
EA = 0
total_questions = len(dataset)

for idx, row in dataset.iterrows():
    question = row["Question"]
    answer = row["Answer"]
    
    print(f"\nProcessing Question {idx + 1}: {question}")
    
    # Generate SQL query
    result = text_to_sql_model.generate(user_prompt=question)
    print(f"Generated SQL Query: {result}")
    
    # Execute and evaluate query
    acc = text_to_sql_model.evaluate(query=result, true_query=answer)
    print(f"Execution Accuracy: {acc:.4f}")

    EA += acc

final_accuracy = EA / total_questions if total_questions > 0 else 0
print(f"\nFinal Execution Accuracy: {final_accuracy:.4f}")


Processing Question 1: Which actors have the first name ‘Scarlett’
Rewritten Prompt: Retrieve actors with the first name 'Scarlett'.

Related Tables: {'inventory', 'film', 'store', 'language', 'address', 'actor', 'film_category', 'city', 'film_actor', 'country', 'staff', 'category'}
Generated SQL Query: SELECT * FROM actor WHERE first_name = 'Scarlett';
Execution Accuracy: 1.0000

Processing Question 2: How many distinct actors last names are there?
Rewritten Prompt: Retrieve the count of unique actor last names.

Related Tables: {'inventory', 'film', 'store', 'language', 'address', 'actor', 'film_category', 'city', 'film_actor', 'country', 'staff', 'category'}
Generated SQL Query: SELECT COUNT(DISTINCT last_name) FROM actor;
Execution Accuracy: 1.0000

Processing Question 3: Which actor has appeared in the most films?
Rewritten Prompt: Retrieve the actor who has appeared in the maximum number of films.

Related Tables: {'inventory', 'film', 'store', 'language', 'address', 'actor', 'f