In [2]:
import os
import json
import nest_asyncio
from tqdm import tqdm
from utils.prompts.prompt_builder import prompt_factory
from utils.data.data_builder import load_data
from utils.llm.ask_llm import run_llm
from third_party.spider_eval.evaluation import evaluate_spider
from utils.data.post_process import save_results
from utils.prompts.prompt_builder import get_openai_key

from utils.utils import mask_query, jaccard_similarity, sql_similarity

path_data = "benchmarks"

data = load_data("spider", path_data)

In [11]:
# EXAMPLE 1

dail1 = "select count ( _ ) from _"
dail2 = "select count ( _ ) from _"
print("DAILSim:", jaccard_similarity(dail1, dail2))

sql1 = "SELECT count(*) FROM table1"
sql2 = "SELECT count(*) FROM table1"
print("SQLSim:", sql_similarity(sql1, sql2))

DAILSim: 1.0
SQLSim: 1.0


In [10]:
# EXAMPLE 2

dail1 = "select _ , count ( _ ) from _ group by _"
dail2 = "select count ( _ ) , _ from _ group by _"
print("DAILSim:", jaccard_similarity(dail1, dail2))

sql1 = "SELECT col1 , count(*) FROM table1 GROUP BY col1"
sql2 = "SELECT count(*) , col1 FROM table1 GROUP BY col1"
print("SQLSim:", sql_similarity(sql1, sql2))

DAILSim: 1.0
SQLSim: 0.9307692307692308


In [9]:
# EXAMPLE 3

dail1 = "select _ from _ order by _ desc"
dail2 = "select _ from _ group by _"
print("DAILSim:", jaccard_similarity(dail1, dail2))

sql1 = "SELECT count(*) FROM table1"
sql2 = "SELECT col1 FROM table1 GROUP BY col1 HAVING count(*) >= num"
print("SQLSim:", sql_similarity(sql1, sql2))

DAILSim: 0.6666666666666666
SQLSim: 0.39431818181818185


In [None]:
# EXAMPLE 4

dail1 = "select count(*) FROM _"
dail2 = "select _ from _"
print("DAILSim:", jaccard_similarity(dail1, dail2))

sql1 = "SELECT col1 , col2 , col3 FROM table1 ORDER BY col3 DESC"
sql2 = "SELECT col1 , col2 , col3 FROM table1"
print("SQLSim:", sql_similarity(sql1, sql2))

In [13]:
# EXAMPLE 5

dail1 = "select count ( _ ) from _"
dail2 = "select count ( _ ) from _ where _ = _"
print("DAILSim:", jaccard_similarity(dail1, dail2))

sql1 = "SELECT count(*) FROM table1"
sql2 = "SELECT count(*) FROM table1 WHERE col2 = num"
print("SQLSim:", sql_similarity(sql1, sql2))

DAILSim: 0.6363636363636364
SQLSim: 0.6269230769230769


In [4]:
# EXAMPLE 6

dail1 = "select count ( _ ) from _"
dail2 = "select count ( _ ) from _ where _ group by _ dec limit _"
print("DAILSim:", jaccard_similarity(dail1, dail2))

sql1 = "SELECT count(*) FROM table1"
sql2 = "SELECT count(*) FROM table1 AS alias1 JOIN table2 AS alias2 ON alias1.col1 = alias2.col1 JOIN table3 AS alias3 ON alias2.col2 = alias3.col2 WHERE alias1.col3 = str AND alias3.col4 = str"
print("SQLSim:", sql_similarity(sql1, sql2))

DAILSim: 0.4666666666666667
SQLSim: 0.24666666666666648


In [15]:
# EXAMPLE 7

dail1 = "select avg ( _ ) , min ( _ ) , max ( _ ) from _ where _ = _"
dail2 = "select _ from _ where _ = _"
print("DAILSim:", jaccard_similarity(dail1, dail2))

sql1 = "SELECT avg(col1) , min(col1) , max(col1) FROM table1 WHERE col2 = str"
sql2 = "SELECT col1 , col2 , col3 FROM table1 WHERE col4 = str"
print("SQLSim:", sql_similarity(sql1, sql2))

DAILSim: 0.38095238095238093
SQLSim: 0.6421052631578947


In [16]:
# EXAMPLE 8

dail1 = "select distinct _ from _ where _ or _"
dail2 = "select _ from _ where _ > _"
print("DAILSim:", jaccard_similarity(dail1, dail2))

sql1 = "SELECT DISTINCT alias1.col1 FROM table1 AS alias1 JOIN table2 AS alias2 on alias1.col2 = alias2.col2 JOIN table3 AS alias3 ON alias3.col3 = alias2.col3 WHERE alias3.col4 = str OR alias3.col4 = str"
sql2 = "SELECT col1 , col2 FROM table1 WHERE col3 > num"
print("SQLSim:", sql_similarity(sql1, sql2))

DAILSim: 0.7
SQLSim: 0.18171362852213915
