# Validacao dos Resultados - C3

In [4]:

from dotenv import load_dotenv
import os
import sys
import json
import pandas as pd
import time
import datetime
load_dotenv()

experiment_path = '..\..'

path = os.path.abspath('')
module_path = os.path.join(path, experiment_path)
print(module_path)
if module_path not in sys.path:
    sys.path.append(module_path+"\\functions")


from dataset_utils import DatasetEvaluator

c:\Users\Tahsin\Desktop\Uni\COSC\Directed Studies\TextToSQL\experiments\C3\..\..


  experiment_path = '..\..'


In [5]:
DEFAULT_PATH = "results/"
EXPERIMENT = "C3"
TARGET_PATH = f"evaluator_results/{EXPERIMENT}_results.csv"
TARGET_PATH_FK = f"evaluator_results/{EXPERIMENT}_foreign_key_results.csv"

Funções úteis

In [6]:
def save_queries(queries, FILE_NAME_PATH):
    data = {"queries":queries}
    with open(FILE_NAME_PATH, "w") as arquivo_json:
        json.dump(data, arquivo_json, indent=4) 

def read_queries(FILE_NAME_PATH):
    with open(FILE_NAME_PATH, encoding="utf-8", errors="ignore") as json_data:
        data = json.load(json_data, strict=False)
    queries = data["queries"]
    return queries

def get_schema(FILE_NAME_PATH):
    if "mondial_gpt" in FILE_NAME_PATH:
        return "mondial_gpt"
    elif "mondial" in FILE_NAME_PATH:
        return "mondial"
    return ""

def get_count_query_type(queries):
    count_types = {}
    for item in queries:
        type = item["type"]
        if type in count_types:
            count_types[type] += 1
        else:
            count_types[type] = 1
    count_types['total'] = len(queries)
    return count_types

def get_dataset_evaluator(FILE_NAME_PATH):
    SCHEMA = get_schema(FILE_NAME_PATH)  
    PREFIX = SCHEMA  
    if SCHEMA == "mondial_gpt":
        PREFIX = "mondial"
    print(PREFIX)
    print(SCHEMA)
    dataset_eval = DatasetEvaluator(dataset_file_path=f"../../datasets/{PREFIX}/{PREFIX}_dataset.json",
                                dataset_tables_path=f"../../datasets/{PREFIX}/result_tables/",
                                db_connection_file=f"../../datasets/{SCHEMA}_db_connection.json",
                                dataset_name=SCHEMA)
    return dataset_eval

def get_files_in_folder():
    files = []
    for r, d, f in os.walk(DEFAULT_PATH):
        for file in f:
            if '.json' in file and 'erro' not in file:
                files.append(os.path.join(r, file))
    return sorted(files)

def evaluate_query(queries, dataset_eval, FILE_NAME_PATH):
    result = {'simple': 0, 'medium': 0, 'complex': 0, 'total': 0}

    for q in queries:
        evaluate_result = False
        q['similarity'] = 0
        try:
            if q["query_string"].strip() != "":
                evaluate_result, similarity, column_matching_index = dataset_eval.evaluate_dataset_query(q["query_string"], q["id"], query_type="sql")
                q['column_matching_index'] = column_matching_index
                q['similarity']=similarity
            
        except Exception as e:
            print(str(e))
            
        q['result'] = evaluate_result
        
        if evaluate_result==True:
            result['total'] += 1
            result[q["type"]] += 1
        
    save_queries(queries, FILE_NAME_PATH)   
    return result

def get_file_name(FILE_NAME_PATH):
    parts = FILE_NAME_PATH.split("/")
    last_element = parts[-1]
    file_name = last_element.replace(".json", "")
    return file_name

def get_experiment_name(filename):
    schema = get_schema(filename).upper()
    experiment_name = f"{EXPERIMENT} - {schema}"
    if "chatgpt4" in filename:
        experiment_name += " - GPT-4"
    elif "chatgpt_" in filename:
        experiment_name += " - GPT-3.5"
    elif "llama" in filename:
        experiment_name += " - LLAMA"
    
    if "turbo" in filename:
        experiment_name += " - Turbo"
        
    if "instances" in filename:
        experiment_name += " - Passing Database Rows"
    elif "code_representation" in filename:
        experiment_name += " - Code Representation"
    if "_fk" in filename:
        experiment_name += " - FK"       
    return experiment_name

def compute_cost(filename):
    queries = read_queries(filename)
    total_costs = 0
    total_tokens = 0
    input_tokens = 0
    output_tokens = 0
    #total_token
    for q in queries:
        data = q.get('token_usage', None)
        if data is not None:
            for key, track_token in data.items():
                total_tokens += track_token.get('total_tokens', 0)
                total_costs += track_token.get('total_cost', 0)
                input_tokens += track_token.get('prompt_tokens', 0)
                output_tokens += track_token.get('completion_tokens', 0)
    print(total_tokens)
    print(round(total_costs, 2))

    return total_tokens, round(total_costs, 2), input_tokens, output_tokens

def compute_cost_specific(filename, specific_trackin):
    queries = read_queries(filename)
    specific_info = 0
    for q in queries:
        data = q.get('token_usage', None)
        if data is not None:
            for key, track_token in data.items():
                specific_info += track_token.get(specific_trackin, 0)
    return specific_info

def compute_time(filename):
    queries = read_queries(filename)
    total_time = 0
    for q in queries:
        total_time += q.get('time', 0)
    print(total_time)
    return str(datetime.timedelta(seconds=round(total_time)))

In [7]:
def run(result_files = [], results = [], compute_costs = True):
    
    if result_files == [] :
        result_files = get_files_in_folder()
    else:
        result_files = sorted(result_files)
        
    
    for file in result_files:
        queries = read_queries(file)
        dataset_eval = get_dataset_evaluator(file)
        count_query_type = get_count_query_type(queries)
        result = evaluate_query(queries, dataset_eval, file)
        
        overall_by_type = {}
        for item in result:
            overall_by_type[f"overall_{item}"] = round(result[item] / count_query_type[item], 4)
        
        result['experiment'] = get_file_name(file)
        result = {**result, **overall_by_type}
        
        result['total_tokens'], result['total_cost'], result['average_tokens_by_query'] = 0, 0, 0
        if compute_costs:
            result['total_tokens'], result['total_cost'], result['input_tokens'], result['output_tokens'] = compute_cost(file)
        result['total_time'] = compute_time(file)
        results.append(result)
        time.sleep(5)
    return results


In [10]:
compute_cost('results/5_c3_queries_chatgpt_mondial_gpt_fk.json')

626143
1.16


(626143, 1.16, 177696, 448447)

In [11]:
compute_cost_specific('results/5_c3_queries_chatgpt_mondial_gpt_fk.json', 'prompt_tokens')

177696

In [12]:
compute_cost_specific('results/5_c3_queries_chatgpt_mondial_gpt_fk.json', 'completion_tokens')

448447

# Validação com os Esquemas com Foreign Keys

In [13]:
results_fk = run([
 'results/5_c3_queries_chatgpt_mondial_gpt_fk.json', 'results/6_c3_queries_chatgpt4_mondial_gpt_fk.json'], [])

mondial
mondial_gpt
{'DB_HOST': 'localhost', 'DB_PORT': '1522', 'DB_USER_NAME': 'MONDIAL_GPT', 'DB_PASS': 'TextDB123', 'DB_NAME': '', 'SQL_DRIVER': 'oracle+oracledb', 'SERVICE_NAME': 'xepdb1', 'SCHEMA': 'MONDIAL_GPT', 'KEYWORD_SEARCH_API_URL': ''}
SELECT max(area) FROM mondial_country where name  = 'Thailand'


  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sq

SELECT area FROM country WHERE name = 'Thailand';
True cols: Index(['MAX(AREA)'], dtype='object')
Adj. True cols: Index(['MAX(AREA)'], dtype='object')
Predicted cols: Index(['AREA'], dtype='object')
Adj. Predicted cols: Index(['AREA'], dtype='object')
True: query matches sql (Similarity: 1.0, column match: 1.0)
SELECT name FROM mondial_province where area > 10000
SELECT name FROM province WHERE area > 10000;
True cols: Index(['NAME'], dtype='object')
Adj. True cols: Index(['NAME'], dtype='object')
Predicted cols: Index(['NAME'], dtype='object')
Adj. Predicted cols: Index(['NAME'], dtype='object')
True: query matches sql (Similarity: 1.0, column match: 1.0)
SELECT mondial_language.name FROM mondial_language INNER JOIN mondial_country ON mondial_language.country = mondial_country.code WHERE mondial_country.name = 'Poland'
SELECT name FROM language WHERE country = 'POL';
True cols: Index(['NAME'], dtype='object')
Adj. True cols: Index([], dtype='object')
Predicted cols: Index(['NAME'], dt

  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sq

Error: Execution failed on sql 'SELECT c.name, COUNT(b.country2) AS num_neighbors FROM country c JOIN borders b ON c.code = b.country1 WHERE c.continent = 'Africa' GROUP BY c.name ORDER BY num_neighbors DESC': ORA-00904: "C"."CONTINENT": invalid identifier
Help: https://docs.oracle.com/error-help/db/ora-00904/
SELECT c.name AS continent, AVG(p.infant_mortality) AS avg_infant_mortality
FROM mondial_continent c
JOIN mondial_encompasses e ON c.name = e.continent
JOIN mondial_population p ON e.country = p.country
GROUP BY c.name;
SELECT continent, AVG(infant_mortality) FROM population JOIN country ON population.country = country.code JOIN province ON province.country = country.code GROUP BY continent;
Error: Execution failed on sql 'SELECT continent, AVG(infant_mortality) FROM population JOIN country ON population.country = country.code JOIN province ON province.country = country.code GROUP BY continent': ORA-00904: "CONTINENT": invalid identifier
Help: https://docs.oracle.com/error-help/d

  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sq

SELECT country.name FROM country JOIN ismember ON country.code = ismember.country GROUP BY country.name HAVING COUNT(DISTINCT ismember.organization) = 1;
True cols: Index(['NAME'], dtype='object')
Adj. True cols: Index(['NAME'], dtype='object')
Predicted cols: Index(['NAME'], dtype='object')
Adj. Predicted cols: Index(['NAME'], dtype='object')
True: query matches sql (Similarity: 1.0, column match: 1.0)
SELECT c.name
FROM mondial_country c
WHERE c.code NOT IN (SELECT m.country
FROM mondial_ismember m
WHERE organization='NATO')
SELECT name FROM country WHERE code NOT IN (SELECT country FROM ismember WHERE type = 'NATO');
True cols: Index(['NAME'], dtype='object')
Adj. True cols: Index([], dtype='object')
Predicted cols: Index(['NAME'], dtype='object')
Adj. Predicted cols: Index([], dtype='object')
False: no match found
SELECT c.name
FROM mondial_country c
JOIN mondial_ismember m
ON c.code=m.country
WHERE organization='NATO'
SELECT country.name FROM country JOIN ismember ON country.code 

  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sq

SELECT city.name, country.name FROM city JOIN country ON city.country = country.code WHERE city.population > 5000000 ORDER BY city.population DESC;
True cols: Index(['CITYNAME', 'COUNTRYNAME'], dtype='object')
Adj. True cols: Index([], dtype='object')
Predicted cols: Index(['NAME', 'NAME'], dtype='object')
Adj. Predicted cols: Index([], dtype='object')
False: no match found
SELECT d.name FROM mondial_desert d 
INNER JOIN mondial_geo_desert gd ON gd.desert = d.name 
WHERE gd.province='Algeria'
SELECT desert.name FROM desert JOIN geo_desert ON desert.name = geo_desert.desert JOIN province ON geo_desert.province = province.name JOIN country ON province.country = country.code WHERE country.name = 'Algeria';
True cols: Index(['NAME'], dtype='object')
Adj. True cols: Index(['NAME'], dtype='object')
Predicted cols: Index(['NAME'], dtype='object')
Adj. Predicted cols: Index(['NAME'], dtype='object')
True: query matches sql (Similarity: 1.0, column match: 1.0)
SELECT d.name, d.area FROM mondial

  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)


SELECT inflation FROM economy WHERE country = 'Germany';
True cols: Index(['INFLATION'], dtype='object')
Adj. True cols: Index([], dtype='object')
Predicted cols: Index(['INFLATION'], dtype='object')
Adj. Predicted cols: Index([], dtype='object')
False: no match found
SELECT p.government
FROM mondial_country c
INNER JOIN mondial_politics p
ON p.country = c.code
WHERE c.name = 'Iran'
SELECT government FROM politics WHERE country = 'IRN';
True cols: Index(['GOVERNMENT'], dtype='object')
Adj. True cols: Index([], dtype='object')
Predicted cols: Index(['GOVERNMENT'], dtype='object')
Adj. Predicted cols: Index([], dtype='object')
False: no match found
SELECT e.industry 
FROM mondial_economy e
INNER JOIN mondial_country c
ON c.code = e.country
WHERE c.name = 'Japan'
SELECT (industry / (agriculture + industry + service)) * 100 AS industry_percentage FROM economy WHERE country = 'JPN';
True cols: Index(['INDUSTRY'], dtype='object')
Adj. True cols: Index([], dtype='object')
Predicted cols: Inde

  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sq

SELECT country.name, SUM(countrypops.population) AS total_population FROM country JOIN borders ON country.code = borders.country1 JOIN countrypops ON borders.country2 = countrypops.country GROUP BY country.name;
True cols: Index(['COUNTRY_NAME', 'TOTAL_NEIGHBOR_POPULATION'], dtype='object')
Adj. True cols: Index(['COUNTRY_NAME'], dtype='object')
Predicted cols: Index(['NAME', 'TOTAL_POPULATION'], dtype='object')
Adj. Predicted cols: Index(['NAME'], dtype='object')
False: no match found
SELECT mountain FROM mondial_mountainonisland WHERE island LIKE '%Borneo%'
SELECT mountain FROM mountainonisland WHERE island = 'Borneo';
True cols: Index(['MOUNTAIN'], dtype='object')
Adj. True cols: Index(['MOUNTAIN'], dtype='object')
Predicted cols: Index(['MOUNTAIN'], dtype='object')
Adj. Predicted cols: Index(['MOUNTAIN'], dtype='object')
True: query matches sql (Similarity: 1.0, column match: 1.0)
SELECT name, population FROM mondial_country ORDER BY area ASC FETCH FIRST 1 ROWS ONLY;
SELECT name, p

  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sq

SELECT capital FROM province WHERE name = 'Andalucía';
True cols: Index(['CAPITAL'], dtype='object')
Adj. True cols: Index([], dtype='object')
Predicted cols: Index(['CAPITAL'], dtype='object')
Adj. Predicted cols: Index([], dtype='object')
False: no match found
SELECT abbreviation FROM mondial_organization WHERE name='General Confederation of Trade Unions'
SELECT abbreviation FROM organization WHERE name = 'General Confederation of Trade Unions';
True cols: Index(['ABBREVIATION'], dtype='object')
Adj. True cols: Index(['ABBREVIATION'], dtype='object')
Predicted cols: Index(['ABBREVIATION'], dtype='object')
Adj. Predicted cols: Index(['ABBREVIATION'], dtype='object')
True: query matches sql (Similarity: 1.0, column match: 1.0)
SELECT city FROM mondial_organization WHERE abbreviation='UPU'
SELECT city FROM organization WHERE abbreviation = 'UPU';
True cols: Index(['CITY'], dtype='object')
Adj. True cols: Index(['CITY'], dtype='object')
Predicted cols: Index(['CITY'], dtype='object')
Adj

  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sql(SQL_query, con=self.db_connection)
  result_df = pd.read_sq

SELECT city.name, city.country FROM city WHERE city.population > 5000000 ORDER BY city.population DESC;
True cols: Index(['CITYNAME', 'COUNTRYNAME'], dtype='object')
Adj. True cols: Index(['CITYNAME'], dtype='object')
Predicted cols: Index(['NAME', 'COUNTRY'], dtype='object')
Adj. Predicted cols: Index(['NAME'], dtype='object')
False: no match found
SELECT d.name FROM mondial_desert d 
INNER JOIN mondial_geo_desert gd ON gd.desert = d.name 
WHERE gd.province='Algeria'
SELECT name FROM desert WHERE name IN ( SELECT desert FROM geo_desert WHERE province IN ( SELECT name FROM province WHERE country = ( SELECT code FROM country WHERE name = 'Algeria' ) ) );
True cols: Index(['NAME'], dtype='object')
Adj. True cols: Index(['NAME'], dtype='object')
Predicted cols: Index(['NAME'], dtype='object')
Adj. Predicted cols: Index(['NAME'], dtype='object')
True: query matches sql (Similarity: 1.0, column match: 1.0)
SELECT d.name, d.area FROM mondial_desert d 
INNER JOIN mondial_geo_desert gd ON gd.d

In [14]:
df_fk = pd.DataFrame(results_fk, columns=['experiment', 'simple', 'medium', 'complex', 'total', 'overall_simple', 'overall_medium', 'overall_complex', 'overall_total','input_tokens', 'output_tokens', 'total_tokens','total_cost', 'total_time'])
df_fk['experiment'] = df_fk['experiment'].apply(get_experiment_name)
df_fk = df_fk.sort_values(by='overall_total', ascending=False)
df_fk

Unnamed: 0,experiment,simple,medium,complex,total,overall_simple,overall_medium,overall_complex,overall_total,input_tokens,output_tokens,total_tokens,total_cost,total_time
1,C3 - MONDIAL_GPT - GPT-4 - FK,28,25,24,77,0.8485,0.7576,0.7059,0.77,153705,426937,580642,30.23,1:20:06
0,C3 - MONDIAL_GPT - GPT-3.5 - FK,25,12,9,46,0.7576,0.3636,0.2647,0.46,177696,448447,626143,1.16,2:44:42


In [23]:
df_fk.to_csv(TARGET_PATH_FK)  