### An example for longitudinal data with single child

In [None]:
#! pip install --upgrade pip
#! pip install fuzzy_sql-1.1.1b0-py3-none-any.whl
#%matplotlib inline

In [None]:
from config_paths import *
import sys
sys.path.append(str(SRC_DIR))

from fuzzy_sql.fuzzy_sql import *
import json
import os
from pathlib import Path

DATASET_NAME='cal'


import matplotlib
matplotlib.use('Agg')

### PREPROCESSING AND IMPORTING DATA INTO DATABASE
##### (This is typically done for one time only)

In [None]:
# set directories
metadata_dir = os.path.join(DATA_DIR, DATASET_NAME,'processed/metadata')
real_dir = os.path.join(DATA_DIR,DATASET_NAME, 'processed/real')
syn_dir = os.path.join(DATA_DIR,DATASET_NAME, 'processed/synthetic')
db_path = os.path.join(DB_DIR, f'{DATASET_NAME}.db')


# identify input data file names
real_csvs=["b_sample.csv","l_sample.csv"]
meta_jsons=['b_sample.json','l_sample.json']
syn_csvs=['b_sample_syn_01.csv','l_sample_syn_01.csv']

In [None]:
# Create/connect to database
conn = sqlite3.connect(db_path)

for real_csv,meta_json,syn_csv in zip(real_csvs,meta_jsons,syn_csvs):
    #Import real
    data,_=prep_data_for_db(os.path.join(real_dir,real_csv)) # Note: This function  can be used to quickly generate metadata template, but this is not shown here.
    with open(os.path.join(metadata_dir, meta_json), 'r') as f:
        metadata=json.load(f)
    candidate_db_idx=get_vars_to_index(metadata,data)
    table_name=Path(real_csv).stem
    make_table(table_name, data, conn,indx_vars=candidate_db_idx)
    #Import syn
    data,_=prep_data_for_db(os.path.join(syn_dir,syn_csv)) # Note: This function  can be used to quickly generate metadata template, but this is not shown here.
    table_name=Path(syn_csv).stem
    make_table(table_name, data, conn,indx_vars=candidate_db_idx)

conn.close()
    

### GENERATING RANDOM QUERIES 

In [None]:
# Create lists with table names. Table names shall be identical to the names initially created in the database.
real_tbl_lst=[Path(real_csv).stem for real_csv in real_csvs]
syn_tbl_lst=[Path(syn_csv).stem for syn_csv in syn_csvs]


In [None]:
# Read metadata from the provided json files into a list of dictionaries. 
# Note 1: Both real and synthetic data should have the same metadata file.
# Note 2: Each input table in real_tbl_lst above shall have its own metadata file.
# Note 2: The json file name shall match that of the real data file name in real_tbl_lst. 
metadata_lst = []
for tbl_name in real_tbl_lst:
    with open(os.path.join(metadata_dir, tbl_name+'.json'), 'r') as f:
        metadata_lst.append(json.load(f))

In [None]:
rnd_queries=gen_queries(10,db_path, real_tbl_lst, metadata_lst,  syn_tbl_lst )


In [None]:
rprtr=QryRprt(real_tbl_lst, rnd_queries)
rprtr.print_html_mltpl('cal.html')
rprtr.plot_violin('Hellinger','cal_hlngr.png' )
rprtr.plot_violin('Euclidean','cal_ecldn.png' )