In [2]:
%load_ext autoreload
%autoreload 2

In [1]:
import json
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
from pathlib import Path

proj_path = Path('.').resolve()

In [8]:
test_cols = ['c_low', 'c_mid', 'c_high', 't_1',  't_2',  't_3+']
eval_cols = ['score', 's_sel', 's_cond', 's_agg', 's_nest', 's_oth']

df_train = pd.read_csv(proj_path / 'data' / 'split_in_domain' / 'train_origin.csv')
df = pd.read_csv(proj_path / 'experiments' / 'bo_evals' / 'all.csv')

# check hint
no_hint_str = 'Descriptions and Virtual Tables:\n{}\n'
hint_cols = [f'{test_col}_hint' for test_col in test_cols]
df_hint = df.loc[:, hint_cols].apply(lambda x: x != no_hint_str)
df_hint.rename(columns={col: f'{col}_exist' for col in hint_cols}, inplace=True)
df = pd.concat([df, df_hint], axis=1)
df = df.dropna()

In [11]:
df_train.groupby(['cate_len_tbls']).size()

cate_len_tbls
1     3412
2     1783
3+     572
dtype: int64

In [12]:
df_train.groupby(['cate_gold_c']).size()

cate_gold_c
high    1434
low     2650
mid     1683
dtype: int64

In [58]:
df.groupby(['cate_len_tbls']).size()

cate_len_tbls
1     857
2     903
3+    225
dtype: int64

# Execution Accuracy

In [82]:
# number of db
len(set(df['db_id'].drop_duplicates().values.tolist() + df_train['db_id'].drop_duplicates().values.tolist()))

160

In [86]:
df_baseline = (df['score'].agg(['sum', 'count']) + df_train['score'].agg(['sum', 'count']))
df_baseline['mean'] = (df_baseline['sum'] / df_baseline['count'] * 100).round(2)
df_baseline

sum      5767.00
count    7752.00
mean       74.39
Name: score, dtype: float64

In [75]:
df_baseline_c = (df.groupby(['cate_gold_c'])['score'].agg(['sum', 'count']) + df_train.groupby(['cate_gold_c'])['score'].agg(['sum', 'count']))
df_baseline_c['ex_acc'] = (df_baseline_c['sum'] / df_baseline_c['count']*100).round(2)
df_baseline_c.reindex(['low', 'mid', 'high'])

Unnamed: 0_level_0,sum,count,ex_acc
cate_gold_c,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
low,2650,3167,83.68
mid,1683,2357,71.4
high,1434,2228,64.36


In [74]:
df_baseline_t = (df.groupby(['cate_len_tbls'])['score'].agg(['sum', 'count']) + df_train.groupby(['cate_len_tbls'])['score'].agg(['sum', 'count']))
df_baseline_t['ex_acc'] = (df_baseline_t['sum'] / df_baseline_t['count']*100).round(2)
df_baseline_t

Unnamed: 0_level_0,sum,count,ex_acc
cate_len_tbls,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,3412,4269,79.93
2,1783,2686,66.38
3+,572,797,71.77


In [30]:
df_overall = df.loc[:, [f'{test_col}_score' for test_col in test_cols]].agg(['sum', 'count']).T
df_overall += np.repeat(np.array([len(df_train)]), 2)[None, :]
df_overall['ex_acc'] = (df_overall['sum'] * 100 / df_overall['count']).round(2)
df_overall['sum'] = df_overall['sum'].astype(int)
df_overall['count'] = df_overall['count'].astype(int)
df_overall

Unnamed: 0,sum,count,ex_acc
c_low_score,6101,7752,78.7
c_mid_score,6144,7752,79.26
c_high_score,6165,7752,79.53
t_1_score,6107,7752,78.78
t_2_score,6156,7752,79.41
t_3+_score,6000,7752,77.4


In [49]:
def get_acc(df: pd.DataFrame, df_train: pd.DataFrame, key_column: list[str], key_test_cols: list[str], key_sort_indexs: list[str]):
    cols = [f'{test_col}_score' for test_col in key_test_cols]
    df_c = df.groupby([key_column])[cols].agg(['count', 'sum']).astype(np.int64)
    for c in cols:
        df_c[c] = df_c[c] + df_train.groupby([key_column]).size().values[:, None]
    df_acc = (df_c.xs('sum', axis=1, level=1) / df_c.xs('count', axis=1, level=1) * 100).round(2)
    df_acc = df_acc.reindex(key_sort_indexs)
    return df_acc.T

key_column = 'cate_gold_c'
key_test_cols = ['c_low', 'c_mid', 'c_high']
key_sort_indexs = ['low', 'mid', 'high']
df_acc_c = get_acc(df, df_train, key_column, key_test_cols, key_sort_indexs)
df_acc_c = pd.concat([df_acc_c, df_overall.iloc[:3][['ex_acc']].rename(columns={'ex_acc': 'overall'})], axis=1)
df_acc_c

Unnamed: 0,low,mid,high,overall
c_low_score,87.97,74.97,69.48,78.7
c_mid_score,86.2,79.17,69.48,79.26
c_high_score,86.11,76.41,73.47,79.53


In [51]:
key_column = 'cate_len_tbls'
key_test_cols = ['t_1', 't_2',  't_3+']
key_sort_indexs = ['1', '2', '3+']
df_acc_t = get_acc(df, df_train, key_column, key_test_cols, key_sort_indexs)
df_acc_t = pd.concat([df_acc_t, df_overall.iloc[3:][['ex_acc']].rename(columns={'ex_acc': 'overall'})], axis=1)
df_acc_t

Unnamed: 0,1,2,3+,overall
t_1_score,85.06,69.92,75.03,78.78
t_2_score,81.96,76.17,76.66,79.41
t_3+_score,81.26,70.18,81.05,77.4


In [53]:
pd.concat([df_acc_c.reset_index(), df_acc_t.reset_index()], axis=1).to_csv(proj_path / 'experiments' / 'bo_evals' / 'acc.csv', index=False)

In [None]:
# groupby hints


In [10]:
df.columns

Index(['sample_id', 'db_id', 'question', 'score', 'gold_sql', 'cate_len_tbls',
       'cate_gold_c', 'need_high|wrong', 'need_high|correct', 'need_mid|wrong',
       'need_mid|correct', 'need_low|wrong', 'need_low|correct',
       'need_1|wrong', 'need_1|correct', 'need_2|wrong', 'need_2|correct',
       'need_3+|wrong', 'need_3+|correct', 'c_low', 'c_low_hint', 'c_mid',
       'c_mid_hint', 'c_high', 'c_high_hint', 't_1', 't_1_hint', 't_2',
       't_2_hint', 't_3+', 't_3+_hint', 'c_low_score', 'c_low_s_sel',
       'c_low_s_cond', 'c_low_s_agg', 'c_low_s_nest', 'c_low_s_oth',
       'c_mid_score', 'c_mid_s_sel', 'c_mid_s_cond', 'c_mid_s_agg',
       'c_mid_s_nest', 'c_mid_s_oth', 'c_high_score', 'c_high_s_sel',
       'c_high_s_cond', 'c_high_s_agg', 'c_high_s_nest', 'c_high_s_oth',
       't_1_score', 't_1_s_sel', 't_1_s_cond', 't_1_s_agg', 't_1_s_nest',
       't_1_s_oth', 't_2_score', 't_2_s_sel', 't_2_s_cond', 't_2_s_agg',
       't_2_s_nest', 't_2_s_oth', 't_3+_score', 't_3+_s_s

# Process data

In [67]:
import sqlparse
from src.database import SqliteDatabase
from src.eval import result_eq, check_if_exists_orderby
from src.eval_complexity import eval_all
from src.process_sql import get_schema, Schema
from src.parsing_sql import (
    extract_selection, 
    extract_condition, 
    extract_aggregation, 
    extract_nested_setoperation, 
    extract_others,
    extract_aliases,
)

def error_check(proj_path, all_tasks):
    error_infos = {
        'pred_exec': [],
        'result': [],
        'parsing_sql': [],
        'error_samples': set(),
        'empty_hint': set()
    }

    # filter parsing errors
    for task in all_tasks:
        with open(proj_path / 'experiments' / f'{task}.jsonl', 'r') as f:
            iterator = tqdm(f, desc=task)
            for line in iterator:
                x = json.loads(line)
                if x['hint'] == '':
                    error_infos['empty_hint'].add(x['sample_id'])
                has_error = False
                schema = get_schema(str(proj_path / 'data' / 'spider' / 'database' / x['db_id'] / f'{x["db_id"]}.sqlite'))
                schema = Schema(schema)
                
                parsed_result = {}
                for s in ['gold', 'pred']:
                    try:
                        sql = x[f'{s}_sql']
                        statement = sqlparse.parse(sql.strip())[0]
                        aliases = extract_aliases(statement)
                        selection = extract_selection(statement, aliases, schema)
                        condition = extract_condition(statement)
                        aggregation = extract_aggregation(statement, aliases, schema)
                        nested = extract_nested_setoperation(statement)
                        others = extract_others(statement, aliases, schema)
                        
                        parsed_result[s + '_selection'] = selection
                        parsed_result[s + '_condition'] = condition
                        parsed_result[s + '_aggregation'] = aggregation
                        parsed_result[s + '_nested'] = nested
                        parsed_result[s + '_others'] = {
                            'distinct': others['distinct'], 
                            'order by': others['order by'], 
                            'limit': others['limit']
                        }
                    except Exception as e:
                        has_error = True
                        error_infos['parsing_sql'].append((x['sample_id'], s, str(e)))
                        error_infos['error_samples'].add(x['sample_id'])
                        break
                
                if has_error:
                    continue

                iterator.update()
                iterator.set_description_str(f'{task} | error samples {len(error_infos["error_samples"])} | empty hints {len(error_infos["empty_hint"])}')

    print(f'Parsing SQL errors: {len(error_infos["parsing_sql"])}')

    return error_infos

# process single task
def process_task(task, error_infos):
    task_results = {
        'sample_id': [],
        'score': [],
        's_sel': [], 's_cond': [], 's_agg': [], 's_nest': [], 's_oth': [],
    }
    with open(proj_path / 'experiments' / f'{task}.jsonl', 'r') as f:
        iterator = tqdm(f, desc=task.lstrip('sql_gen_hint_top'))
        for line in iterator:
            x = json.loads(line)
            if x['sample_id'] in error_infos['error_samples']:
                continue

            task_results['sample_id'].append(x['sample_id'])
            # parsing sql
            schema = get_schema(str(proj_path / 'data' / 'spider' / 'database' / x['db_id'] / f'{x["db_id"]}.sqlite'))
            schema = Schema(schema)
            
            parsed_result = {}
            for s in ['gold', 'pred']:
                sql = x[f'{s}_sql']
                statement = sqlparse.parse(sql.strip())[0]
                aliases = extract_aliases(statement)
                selection = extract_selection(statement, aliases, schema)
                condition = extract_condition(statement)
                aggregation = extract_aggregation(statement, aliases, schema)
                nested = extract_nested_setoperation(statement)
                others = extract_others(statement, aliases, schema)
                
                parsed_result[s + '_selection'] = selection
                parsed_result[s + '_condition'] = condition
                parsed_result[s + '_aggregation'] = aggregation
                parsed_result[s + '_nested'] = nested
                parsed_result[s + '_others'] = {
                    'distinct': others['distinct'], 
                    'order by': others['order by'], 
                    'limit': others['limit']
                }

            # partial & complexity eval
            eval_res = eval_all(parsed_result, k=6)
            task_results['s_sel'].append(eval_res['score']['selection'])
            task_results['s_cond'].append(eval_res['score']['condition'])
            task_results['s_agg'].append(eval_res['score']['aggregation'])
            task_results['s_nest'].append(eval_res['score']['nested'])
            task_results['s_oth'].append(eval_res['score']['others'])
            # Execution
            database = SqliteDatabase(
                str(proj_path / 'data' / 'spider' / 'database' / x['db_id'] / f'{x["db_id"]}.sqlite')
            )
            error_info = ''
            try:
                pred_result = database.execute(x['pred_sql'], rt_pandas=False)
            except Exception as e:
                pred_result = []
                error_info = 'Predction Execution Error:' + str(e)
                score = 0

            try:
                gold_result = database.execute(x['gold_sql'], rt_pandas=False)
            except Exception as e:
                error_info = 'Gold Execution Error:' + str(e)

            if 'Gold Execution Error' in error_info:
                continue
            elif 'Predction Execution Error' in error_info:
                task_results['score'].append(score)
                continue
            else:
                exists_orderby = check_if_exists_orderby(x['gold_sql'])
                score = int(result_eq(pred_result, gold_result, order_matters=exists_orderby))
                task_results['score'].append(score)

    return task_results

def process_all_exps(proj_path, all_tasks):
    error_infos = error_check(proj_path, all_tasks)
    for task in all_tasks:
        task_results = process_task(task, error_infos)
        pd.DataFrame(task_results).to_csv(proj_path / 'experiments' / 'bo_evals' / f'{task}.csv', index=False)

In [None]:
# remove trainset error samples when parsing
train_error_infos = p


In [69]:
all_tasks = []
typ = '_c'  # '_c'
iterator = ['low', 'mid', 'high'] if typ == '_c' else ['1', '2', '3+']
for typ2 in ['desc', 'descvt']:        
    for n_retrieval in [1, 3]:
        for level in iterator:
            all_tasks.append(f'sql_gen_hint_top{n_retrieval}_{level}_{typ2}')

# process_all_exps(proj_path, all_tasks)

# Aggregating the results

In [47]:
import sqlparse
def additional_tables(proj_path, task, df: pd.DataFrame):
    df1 = pd.read_csv(proj_path / 'experiments' / 'evals' / 'spider_train_eval_plus.csv')
    df1['sample_id'] = 'train.' + df1['sample_id'].astype(str)
    df2 = pd.read_csv(proj_path / 'experiments' / 'evals' / 'spider_dev_eval_plus.csv')
    df2['sample_id'] = 'dev.' + df2['sample_id'].astype(str)
    df3 = pd.concat([df1, df2]).reset_index(drop=True)
    df3['previous_pred_sql'] = df3['pred_sql'].apply(lambda x: sqlparse.format(x, reindent=True))
    
    data = []
    with open(proj_path / 'experiments' / f'{task}.jsonl') as f:
        for line in f:
            data.append(json.loads(line))
    df4 = pd.DataFrame(data)
    df4['gold_sql'] = df4['gold_sql'].apply(lambda x: sqlparse.format(x, reindent=True))
    df4['pred_sql'] = df4['pred_sql'].apply(lambda x: sqlparse.format(x, reindent=True))

    df5 = pd.merge(
        left=df.loc[:, ['sample_id', 'score', 'cate_len_tbls', 'cate_gold_c', 's_sel', 's_cond', 's_agg', 's_nest', 's_oth']], 
        right=df4.loc[:, ['sample_id', 'db_id', 'hint', 'gold_sql', 'pred_sql']], 
        on='sample_id', how='left')
    df6 = pd.merge(
        left=df5, 
        right=df3.loc[:, ['sample_id', 'previous_pred_sql']],
        on='sample_id', how='left')
    with pd.ExcelWriter(proj_path / 'experiments' / 'bo_evals' / 'additional' / f'{task}.xlsx') as writer:
        df6.to_excel(writer, sheet_name='task', index=False)

In [49]:
# real eval
typ = '_t' # '_t', '_c'
iterator = ['low', 'mid', 'high'] if typ == '_c' else ['1', '2', '3+']
all_tasks = []
for typ2 in ['desc', 'descvt']:        
    for n_retrieval in [1, 3]:
        for level in iterator:
            all_tasks.append(f'sql_gen_hint_top{n_retrieval}_{level}_{typ2}')

col = 'cate_gold_c' if typ == '_c' else 'cate_len_tbls'
results = {
    'bo_topk': [], 'bo_level': [], 'bo_desc_vt': [], 
    'count': [], 'ex_acc': [], 'pm_sel': [], 'pm_cond': [], 'pm_agg': [], 'pm_nest': [], 'pm_oth': [],
}
for l in iterator:
    results[f'ex_acc_{l}'] = []
    results[f'count_{l}'] = []
    for c in ['sel', 'cond', 'agg', 'nest', 'oth']:
        results[f'pm_{c}_{l}'] = []

df_test = pd.read_csv(proj_path / 'data' / 'split_in_domain' / f'bo{typ}_eval.csv')
baseline = df_test.groupby(col)[['score', 's_sel', 's_cond', 's_agg', 's_nest', 's_oth']].mean() * 100

for task in all_tasks:
    print(task)
    # results['task'].append(task)
    results['bo_topk'].append(int(task.lstrip('sql_gen_hint_top').split('_')[0]))
    results['bo_level'].append(task.lstrip('sql_gen_hint_top').split('_')[1])
    results['bo_desc_vt'].append(task.lstrip('sql_gen_hint_top').split('_')[2])

    df = pd.read_csv(proj_path / 'experiments' / 'bo_evals' / f'{task}.csv')
    df = pd.merge(df, df_test.loc[:, ['sample_id', 'cate_len_tbls', 'cate_gold_c']], on='sample_id', how='left')

    results['count'].append(df.shape[0])
    results['ex_acc'].append(df['score'].mean()*100)
    results['pm_sel'].append((df['s_sel'].mean() - df_test['s_sel'].mean())*100)
    results['pm_cond'].append((df['s_cond'].mean() - df_test['s_cond'].mean())*100)
    results['pm_agg'].append((df['s_agg'].mean() - df_test['s_agg'].mean())*100)
    results['pm_nest'].append((df['s_nest'].mean() - df_test['s_nest'].mean())*100)
    results['pm_oth'].append((df['s_oth'].mean() - df_test['s_oth'].mean())*100)

    g_score = df.groupby(col)[['score', 's_sel', 's_cond', 's_agg', 's_nest', 's_oth']].mean() * 100 - baseline
    for l in iterator:
        results[f'ex_acc_{l}'].append(g_score.loc[l, 'score'])
        results[f'count_{l}'].append(df[df[col] == l].shape[0])
        for c in ['s_sel', 's_cond', 's_agg', 's_nest', 's_oth']:
            results[f'pm_{c[2:]}_{l}'].append(g_score.loc[l, c])

    if task.lstrip('sql_gen_hint_top').split('_')[2] == 'descvt' and int(task.lstrip('sql_gen_hint_top').split('_')[0]) == 3:
        additional_tables(proj_path, task, df)

sql_gen_hint_top1_1_desc
sql_gen_hint_top1_2_desc
sql_gen_hint_top1_3+_desc
sql_gen_hint_top3_1_desc
sql_gen_hint_top3_2_desc
sql_gen_hint_top3_3+_desc
sql_gen_hint_top1_1_descvt
sql_gen_hint_top1_2_descvt
sql_gen_hint_top1_3+_descvt
sql_gen_hint_top3_1_descvt
sql_gen_hint_top3_2_descvt
sql_gen_hint_top3_3+_descvt


In [81]:
df = pd.DataFrame(results)
# df.set_index(['bo_topk','bo_desc_vt', 'bo_level'], inplace=True)
desc_vt = {'desc': 'BA', 'descvt': 'BA + VT'}
df['bo_desc_vt'] = df['bo_desc_vt'].map(desc_vt)
df['bo_level'] = df['bo_level'].str.capitalize()

idx_cols = ['bo_topk','bo_desc_vt', 'bo_level']
count_cols = ['count', 'count_low', 'count_mid', 'count_high'] if typ == '_c' else ['count', 'count_1', 'count_2', 'count_3+']
r1 = ['ex_acc_low', 'ex_acc_mid', 'ex_acc_high', 'ex_acc'] if typ == '_c' else ['ex_acc_1', 'ex_acc_2', 'ex_acc_3+', 'ex_acc']
r2 = ['pm_sel', 'pm_cond', 'pm_agg', 'pm_nest', 'pm_oth'] if typ == '_c' else ['pm_sel', 'pm_cond', 'pm_agg', 'pm_nest', 'pm_oth']
r3 = ['pm_sel_low', 'pm_sel_mid', 'pm_sel_high'] if typ == '_c' else ['pm_sel_1', 'pm_sel_2', 'pm_sel_3+']
r4 = ['pm_cond_low', 'pm_cond_mid', 'pm_cond_high'] if typ == '_c' else ['pm_cond_1', 'pm_cond_2', 'pm_cond_3+']
r5 = ['pm_agg_low', 'pm_agg_mid', 'pm_agg_high'] if typ == '_c' else ['pm_agg_1', 'pm_agg_2', 'pm_agg_3+']
r6 = ['pm_nest_low', 'pm_nest_mid', 'pm_nest_high'] if typ == '_c' else ['pm_nest_1', 'pm_nest_2', 'pm_nest_3+']
r7 = ['pm_oth_low', 'pm_oth_mid', 'pm_oth_high'] if typ == '_c' else ['pm_oth_1', 'pm_oth_2', 'pm_oth_3+']

In [82]:
with pd.ExcelWriter(proj_path / 'experiments' / 'reports' / f'bo_eval{typ}.xlsx') as writer:
    df.loc[:, idx_cols+count_cols].to_excel(writer, sheet_name='count')

    df1 = df.loc[:, idx_cols+r1].round(2)
    rename_cols = {
        'bo_topk': 'Top-K', 'bo_desc_vt': 'Prompt Type',
        'bo_level': 'Complexity Lv.' if typ == '_c' else 'Table Num.',
        'ex_acc': 'Overall',
    }
    for l in iterator:
        rename_cols[f'ex_acc_{l}'] = f'{l.capitalize()}'
    df1.rename(columns=rename_cols, inplace=True)
    df1.to_excel(writer, sheet_name='ex_acc', index=False)

    df2 = df.loc[:, idx_cols+r2].round(2)
    
    df2.rename(columns={
        'bo_topk': 'Top-K', 'bo_desc_vt': 'Prompt Type',
        'bo_level': 'Complexity Lv.' if typ == '_c' else 'Table Num.',
        'pm_sel': 'Selection',
        'pm_cond': 'Condition',
        'pm_agg': 'Aggregation',
        'pm_nest': 'Nested',
        'pm_oth': 'Others',
    }, inplace=True)
    df2.to_excel(writer, sheet_name='pm', index=False)

    df3 = df.loc[:, idx_cols+r3+r4+r5+r6+r7].round(2)
    rename_cols = {
        'bo_topk': 'Top-K', 'bo_desc_vt': 'Prompt Type',
        'bo_level': 'Complexity Lv.' if typ == '_c' else 'Table Num.',
    }
    for l in iterator:
        for c in ['sel', 'cond', 'agg', 'nest', 'oth']:
            rename_cols[f'pm_{c}_{l}'] = f'{c.capitalize()} {l.capitalize()}'
    df3.rename(columns=rename_cols, inplace=True)
    df3.to_excel(writer, sheet_name='pm_detail', index=False)

---

---

In [66]:
df1 = pd.read_csv(proj_path / 'experiments' / 'evals' / 'spider_train_eval_plus.csv')
df1['sample_id'] = 'train.' + df1['sample_id'].astype(str)
df2 = pd.read_csv(proj_path / 'experiments' / 'evals' / 'spider_dev_eval_plus.csv')
df2['sample_id'] = 'dev.' + df2['sample_id'].astype(str)
df = pd.concat([df1, df2]).reset_index(drop=True)

df.to_csv(proj_path / 'experiments' / 'bo_evals' / 'additional' / f'all.csv', index=False)

In [63]:
for i, x in df.loc[df['sample_id'].isin(['train.388', 'train.387'])].iterrows():
    print(x['sample_id'])
    print(x['question'])
    print(sqlparse.format(x['gold_sql'], reindent=True))
    print()

train.387
Show origins of all flights with destination Honolulu.
SELECT origin
FROM Flight
WHERE destination = 'Honolulu'

train.388
What are the origins of all flights that are headed to Honolulu?
SELECT origin
FROM Flight
WHERE destination = 'Honolulu'



In [60]:
df.loc[370:380, ['sample_id', 'question', 'gold_sql', 'pred_sql']]

Unnamed: 0,sample_id,question,gold_sql,pred_sql
370,train.380,What is the id and salary of the employee name...,"SELECT eid , salary FROM Employee WHERE name ...","SELECT eid, salary FROM employee WHERE name = ..."
371,train.381,How many flights do we have?,SELECT count(*) FROM Flight,SELECT COUNT(*) AS total_flights FROM flight;
372,train.382,What is the number of flights?,SELECT count(*) FROM Flight,SELECT COUNT(*) AS number_of_flights FROM flight;
373,train.383,"Show flight number, origin, destination of all...","SELECT flno , origin , destination FROM Flig...","SELECT flno, origin, destination FROM flight O..."
374,train.384,"What is the flight number, origin, and destina...","SELECT flno , origin , destination FROM Flig...","SELECT flno, origin, destination FROM flight O..."
375,train.385,Show all flight number from Los Angeles.,SELECT flno FROM Flight WHERE origin = 'Los ...,SELECT flno FROM flight WHERE origin = 'Los An...
376,train.386,What are the numbers of all flights coming fro...,SELECT flno FROM Flight WHERE origin = 'Los ...,SELECT flno FROM flight WHERE origin = 'Los An...
377,train.387,Show origins of all flights with destination H...,SELECT origin FROM Flight WHERE destination =...,SELECT DISTINCT origin FROM flight WHERE desti...
378,train.388,What are the origins of all flights that are h...,SELECT origin FROM Flight WHERE destination =...,SELECT DISTINCT origin FROM flight WHERE desti...
379,train.389,Show me the departure date and arrival date fo...,"SELECT departure_date , arrival_date FROM Fli...","SELECT departure_date, arrival_date FROM fligh..."
