### Importing California data into a database as an example of longitudinal single-child data 

In [5]:
from config_paths import *
import sys
sys.path.append(str(SRC_DIR))

import json
import sqlite3
import os
from pathlib import Path

from fuzzy_sql.load import prep_data_for_db, get_vars_to_index, make_table


DATASET_NAME='cal'

In [6]:
# set directories
metadata_dir = os.path.join(DATA_DIR, DATASET_NAME,'processed/metadata')
real_dir = os.path.join(DATA_DIR,DATASET_NAME, 'processed/real')
syn_dir = os.path.join(DATA_DIR,DATASET_NAME, 'processed/synthetic')
db_path = os.path.join(DB_DIR, f'{DATASET_NAME}.db')

In [7]:
# identify input data file names
real_csvs=["b_sample.csv","l_sample.csv"]
meta_jsons=['b_sample.json','l_sample.json']
syn_csvs=['b_sample_syn_01.csv','l_sample_syn_01.csv']


### Prepare real data and import it into the database with indexing

In [8]:
conn = sqlite3.connect(db_path)
with conn:
    for real_csv,meta_json,syn_csv in zip(real_csvs,meta_jsons,syn_csvs):
        #Import real
        data,_=prep_data_for_db(os.path.join(real_dir,real_csv)) # Note: This function  can be used to quickly generate metadata template, but this is not shown here.
        with open(os.path.join(metadata_dir, meta_json), 'r') as f:
            metadata=json.load(f)
        candidate_db_idx=get_vars_to_index(metadata,data)
        table_name=Path(real_csv).stem
        make_table(table_name, data, conn,indx_vars=candidate_db_idx)

        #Import syn
        data,_=prep_data_for_db(os.path.join(syn_dir,syn_csv)) # Note: This function  can be used to quickly generate metadata template, but this is not shown here.
        table_name=Path(syn_csv).stem
        make_table(table_name, data, conn,indx_vars=candidate_db_idx)

Table b_sample already exists in the database
Table b_sample_syn_01 already exists in the database
Table l_sample already exists in the database
Table l_sample_syn_01 already exists in the database
