In [1]:
import pprint
from dbchat.evaluation.utils import load_evaluation_csv_data
from dbchat import ROOT_DIR
fpath = ROOT_DIR.parent.parent / "examples/evaluation/queries.csv"

eval_data = load_evaluation_csv_data( fpath, stream=True, chunksize = 2)
for d in eval_data:
    pprint.pprint(d)

[{'id': '1',
  'note': 'chooses the correct table.',
  'response': 'We have made a total of $75.24 in Berlin.',
  'tables': 'invoices',
  'user_query': 'How much money have we made in Berlin?'},
 {'id': '2',
  'note': 'deals with NULL, verifies with user that BillingState is not unique '
          'to the entire table',
  'response': 'The state that made the most money is California (CA).',
  'tables': 'invoices',
  'user_query': 'Which state made the most money?'}]
[{'id': '3',
  'note': 'semantic meaning of "Type of song" is used to select genre table',
  'response': 'The type of music that has the longest song is "Occupation / '
              'Precipice".',
  'tables': 'tracks,genres',
  'user_query': 'Which type of music has the longest song?'},
 {'id': '4',
  'note': 'longer chain of related entities',
  'response': 'The genre of music that has the longest song is "TV Shows".',
  'tables': 'employees,customers,invoices,invoice_items',
  'user_query': 'Which was the most expensive 

In [2]:
import yaml
config_path = ROOT_DIR.parent / "tests/data/inputs/cfg_3.yml"
with open(config_path) as f:
    config = yaml.safe_load(f)
print(yaml.dump(config))

approach: sql_engine_w_reranking
database:
  metadata:
    document_id_like: '%-2'
    metadata_path: sqlite:///data/chinook.db
    table_name: table_descriptions
  path: sqlite:///data/chinook.db
index:
  class: ollama
  name: llama2reranker
  reranking:
    config_object: ReRankerLLMConfig
    reranker_kwargs:
      top_n: 3
  retriever_kwargs:
    similarity_top_k: 4
llm:
  class: ollama
  name: llama2



In [3]:
from dbchat.sql_agent import create_agent

query_engine = create_agent( config )

eval_data = load_evaluation_csv_data( fpath, stream=False)
input_query = eval_data[0]['user_query']
response = query_engine.query(input_query)
print(f"{input_query=}"
        "\n"
        f"{response.response=}"
        "\n"
        f"{response.metadata['sql_query']}")
retrieved_tables = query_engine.sql_retriever._get_tables(input_query)
print(f"{retrieved_tables=}")


INFO:numexpr.utils:Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.
Debugging Query: SELECT TABLE_NAME, DESCRIPTION FROM table_descriptions WHERE TABLE_NAME IN ('albums','artists','customers','employees','genres','invoice_items','invoices','media_types','playlist_track','playlists','artists'0,'artists'1) AND DOCUMENT_ID LIKE '%-2'
INFO:llama_index.indices.struct_store.sql_retriever:> Table desc str: Table 'tracks' has columns: TrackId (INTEGER), Name (NVARCHAR(200)), AlbumId (INTEGER), MediaTypeId (INTEGER), GenreId (INTEGER), Composer (NVARCHAR(220)), Milliseconds (INTEGER), Bytes (INTEGER), UnitPrice (NUMERIC(10, 2)), and foreign keys: ['MediaTypeId'] -> media_types.['MediaTypeId'], ['GenreId'] -> genres.['GenreId'], ['AlbumId'] -> albums.['AlbumId']. The table description is: Tracks on a album, and details like price. Does not store a reference to playlist.

Table 'invoice_items' has 

## Using the evaluate functions

In [4]:
from dbchat.evaluation.utils import save_test_results
from dbchat.evaluation.evaluate import evaluate_table_name_retrieval, evaluate_synthetic_judge

results = evaluate_synthetic_judge( test_data_path = ROOT_DIR.parent.parent / "examples/evaluation/queries.csv",
                                    config_path = config_path )
print(results)

Debugging Query: SELECT TABLE_NAME, DESCRIPTION FROM table_descriptions WHERE TABLE_NAME IN ('albums','artists','customers','employees','genres','invoice_items','invoices','media_types','playlist_track','playlists','artists'0,'artists'1) AND DOCUMENT_ID LIKE '%-2'
[{'test_name': 'evaluate_synthetic_judge', 'config': {'approach': 'sql_engine_w_reranking', 'database': {'path': 'sqlite:///data/chinook.db', 'metadata': {'metadata_path': 'sqlite:///data/chinook.db', 'table_name': 'table_descriptions', 'document_id_like': '%-2'}}, 'index': {'name': 'llama2reranker', 'class': 'ollama', 'retriever_kwargs': {'similarity_top_k': 4}, 'reranking': {'config_object': 'ReRankerLLMConfig', 'reranker_kwargs': {'top_n': 3}}}, 'llm': {'name': 'llama2', 'class': 'ollama'}}, 'input_query': 'How much money have we made in Berlin?', 'expected_response': 'We have made a total of $75.24 in Berlin.', 'actual_response': {'response': None, 'tables': ['tracks', 'invoice_items', 'playlist_track']}, 'synthesized_jud

In [None]:
save_test_results( results, test_results_path = ROOT_DIR.parent / "test_results" / "results.json" )

### Evaluate performance on retrieving the correct tables

In [None]:
results = evaluate_table_name_retrieval( test_data_path = ROOT_DIR.parent.parent / "examples/evaluation/queries.csv",
                                    config_path = config_path )
print(results)

In [None]:
save_test_results( results, test_results_path = ROOT_DIR.parent / "test_results" / "results.json" )