### Generate a report for Hellinger Distance between real and synthetic random queries

In [1]:
from pathlib import Path
import os
import sys 
import pandas as pd
from csv import writer  
import numpy as np
import weasyprint

sys.path.append('/home/samer/projects/fuzzy_sql/src') #This will enable reading the modules
from fuzzy_sql.fuzzy_sql import *


In [2]:
#set paths
root_dir=Path('/home/samer/projects/fuzzy_sql')
real_dir=os.path.join(root_dir,'data/tabular/ready/real')
meta_dir=os.path.join(root_dir,'data/tabular/ready/metadata')
syn_dir=os.path.join(root_dir,'data/tabular/ready/synthetic')

In [3]:
#extract real and synthetic data names
real_names=extract_fnames(real_dir)
real_names.sort()
names_dict=find_syn_fnames(syn_dir, real_names)

Extracted the names of 40 real datasets
Extracted the names of all available synthetic datasets corresponding to 40 real datasets


In [4]:
def summarize_queries(names_dict,no_of_queries):
    syn_dict={} #dictionary of Hellinger and Euclidean Stats per each synthetic trial
    syn_dict['real_name']=[]
    syn_dict['syn_name']=[]
    syn_dict['no_queries']=[]
    syn_dict['hlngr_mean']=[]
    syn_dict['hlngr_median']=[]
    syn_dict['hlngr_stddev']=[]
    syn_dict['ecldn_mean']=[]
    syn_dict['ecldn_median']=[]
    syn_dict['ecldn_stddev']=[]

    for real_name in names_dict:
        real_path=os.path.join(real_dir, real_name+'.csv')
        meta_path=os.path.join(meta_dir, real_name+'.json') #If no corresponding metadata is available, skip tuple
        if not os.path.exists(meta_path): # skip if there is no metadata defined for the dataset
                continue
        if len(names_dict[real_name])==0: #skip if  no synthetic data is available 
            continue
        
        for syn_name in names_dict[real_name]:
            syn_dict['real_name'].append(real_name)
            syn_dict['syn_name'].append(syn_name)
            syn_path=os.path.join(syn_dir, syn_name+'.csv')
            scored_queries=fuzz_tabular(no_of_queries,'twin_aggfltr', real_path, meta_path,syn_path,run_folder='../.runs', printme=False)
            syn_dict['no_queries'].append(no_of_queries)
            syn_dict['hlngr_mean'].append(np.nanmean(scored_queries['hlngr_dist']))
            syn_dict['hlngr_median'].append(np.nanmedian(scored_queries['hlngr_dist']))
            syn_dict['hlngr_stddev'].append(np.nanstd(scored_queries['hlngr_dist']))
            syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
            syn_dict['ecldn_median'].append(np.nanmedian(scored_queries['ecldn_dist']))
            syn_dict['ecldn_stddev'].append(np.nanstd(scored_queries['ecldn_dist']))
            
    return pd.DataFrame(syn_dict)

   

In [5]:
#Generate most abstrcated summary per real dataset
#test_dict={k: names_dict[k] for k in list(names_dict)[:2]}
syn_summary=summarize_queries(names_dict,10)
counts=syn_summary.groupby('real_name').count()['hlngr_mean'].values
real_summary=syn_summary.groupby(['real_name']).mean()
real_summary.insert(1,'no_syn',counts)


Table C1 already exists in the database
Table C1_syn_default_19 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 
Table C1 already exists in the database
Table C1_syn_default_3 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate 

  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Table C19 already exists in the database
Table C19_syn_39 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 
Table C2 already exists in the database
Table C2_syn_06 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 
Tabl

  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 


  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Table C22 already exists in the database
Table C22_syn_06 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 


  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Table C23 already exists in the database
Table C23_syn_06 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 
Table C23 already exists in the database
Table C23_syn_39 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 
Ta

  syn_dict['hlngr_mean'].append(np.nanmean(scored_queries['hlngr_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))


Table C25_syn_39 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 


  syn_dict['hlngr_mean'].append(np.nanmean(scored_queries['hlngr_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))


Table C25 already exists in the database
Table C25_syn_06 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 
Table C26 already exists in the database
Table C26_syn_06 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 
Ta

  syn_dict['hlngr_mean'].append(np.nanmean(scored_queries['hlngr_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))


Table C27 already exists in the database
Table C27_syn_06 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 
Table C28 already exists in the database
Table C28_syn_39 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 
Ta

  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Table C6 already exists in the database
Table C6_syn_default_17 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 
Table C6 already exists in the database
Table C6_syn_default_7 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate 

  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Table C6 already exists in the database
Table C6_syn_default_12 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 
Table C6 already exists in the database
Table C6_syn_default_14 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate

  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Table C6 already exists in the database
Table C6_syn_39 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 


  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Table C6 already exists in the database
Table C6_syn_default_6 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 
Table C6 already exists in the database
Table C6_syn_default_3 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Q

  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Table C6 already exists in the database
Table C6_syn_default_20 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 
Table C6 already exists in the database
Table C6_syn_default_18 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate

  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Table C6 already exists in the database
Table C6_syn_default_1 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 
Table C6 already exists in the database
Table C6_syn_default_9 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Q

  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Table C6 already exists in the database
Table C6_syn_default_2 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 


  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Table C6 already exists in the database
Table C6_syn_default_11 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 
Table C6 already exists in the database
Table C6_syn_default_15 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate

  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 
Table C6 already exists in the database
Table C6_syn_default_5 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 


  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Table C6 already exists in the database
Table C6_syn_06 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 
Table C6 already exists in the database
Table C6_syn_default_13 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9

  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Table C6 already exists in the database
Table C6_syn_default_8 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 


  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Table C6 already exists in the database
Table C6_syn_default_10 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 
Table C7 already exists in the database
Table C7_syn_default_13 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate

  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Table n0147OS already exists in the database
Table n0147OS_syn_06 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 


  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,


Table trial1 already exists in the database
Table trial1_syn_06 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 


  syn_dict['hlngr_mean'].append(np.nanmean(scored_queries['hlngr_dist']))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  syn_dict['ecldn_mean'].append(np.nanmean(scored_queries['ecldn_dist']))


Table trial2 already exists in the database
Table trial2_syn_06 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate Query 9 
Table trial2 already exists in the database
Table trial2_syn_39 already exists in the database
Generated Conditioned Aggregate Query 0 
Generated Conditioned Aggregate Query 1 
Generated Conditioned Aggregate Query 2 
Generated Conditioned Aggregate Query 3 
Generated Conditioned Aggregate Query 4 
Generated Conditioned Aggregate Query 5 
Generated Conditioned Aggregate Query 6 
Generated Conditioned Aggregate Query 7 
Generated Conditioned Aggregate Query 8 
Generated Conditioned Aggregate

In [6]:
# Generate Detailed Report in HTML
with open('../reports/SEP202022/frame_syn.html','r') as f:
    html_string=f.read()
with open('../reports/SEP202022/report_syn.html', 'w') as f:
    f.write(html_string.format(pandas_table=syn_summary.to_html(classes='mystyle')))


# Generate Report in HTML
with open('../reports/SEP202022/frame_real.html','r') as f:
    html_string=f.read()
with open('../reports/SEP202022/report_real.html', 'w') as f:
    f.write(html_string.format(pandas_table=real_summary.to_html(classes='mystyle')))

In [7]:
#Convert Detailed to pdf
# with open('../reports/SEP202022/hlngr_queries_syn_report.html','r') as f:
#     html_string=f.read()
# #html_string=html_string.replace('\n',"")


pdf=weasyprint.HTML('../reports/SEP202022/report_syn.html').write_pdf()
with open('../reports/SEP202022/report_syn.pdf','w+b') as f:
    f.write(pdf) 



#Convert summary to pdf
# with open('../reports/SEP202022/hlngr_queries_report.html','r') as f:
#     html_string=f.read()
# #html_string=html_string.replace('\n',"")

import weasyprint
pdf=weasyprint.HTML('../reports/SEP202022/report_real.html').write_pdf()
with open('../reports/SEP202022/report_real.pdf','w+b') as f:
    f.write(pdf) 