### An example for generating *Aggregate-Filter* for cross-sectional data 
######  Before executing this notebook, please makes sure that data was imported earlier into the database.

In [None]:
! pip install --upgrade pip
! pip install fuzzy-sql

In [None]:
import json
import os
from pathlib import Path

from fuzzy_sql.generate import gen_aggfltr_queries
from fuzzy_sql.report import Report

DATASET_NAME='sdgd'

In [None]:
# set directories
DATA_DIR=os.path.join(os.getcwd(),'data')
DB_DIR=os.path.join(os.getcwd(),'databases')

metadata_dir = os.path.join(DATA_DIR, DATASET_NAME,'metadata')
db_path = os.path.join(DB_DIR, f'{DATASET_NAME}.db')

### GENERATING RANDOM QUERIES 

In [None]:
# Create lists with table names. Table names shall be identical to the names initially created in the database.
real_tbl_lst=['C1']
syn_tbl_lst=['C1_syn_default_1']

# Read metadata from the provided json files into a list of dictionaries. 
# Note 1: Both real and synthetic data should have the same metadata file.
# Note 2: Each input table in real_tbl_lst above shall have its own metadata file.
# Note 2: The json file name shall match that of the real data file name in real_tbl_lst. 
metadata_lst = []
for tbl_name in real_tbl_lst:
    with open(os.path.join(metadata_dir, tbl_name+'.json'), 'r') as f:
        metadata_lst.append(json.load(f))

In [None]:
rnd_queries=gen_aggfltr_queries(10,db_path, real_tbl_lst, metadata_lst,  syn_tbl_lst )

### REPORTING 

In [None]:
rprtr=Report(real_tbl_lst, rnd_queries)
rprtr.print_html_mltpl(f'{DATASET_NAME}.html')
rprtr.plot_violin('Hellinger',f'{DATASET_NAME}_hlngr.png' )
rprtr.plot_violin('Euclidean',f'{DATASET_NAME}_ecldn.png' )