In [1]:
import noteql

In [2]:
session = noteql.Session('recursion', dburi='postgresql://noteql:noteql@localhost/noteql')

In [3]:
session.load_json('full-1.1.json', path_to_list='releases', table_name='import', field_name='releases', overwrite=True)

Total rows loaded 103


In [4]:
session.run_sql('''
drop table if exists releases;
select releases->> 'id' release_id, releases into releases from import
''')

'Success'

In [5]:
session.run_sql('''
drop table if exists flattened;
WITH RECURSIVE all_paths(release_id, path, "value") AS (
    select release_id, 
       (key_value).key "path", 
       (key_value).value "value" from 
    (select release_id, jsonb_each(releases) key_value from releases) a
  UNION ALL
    (select release_id,
           case when key_value is not null then
               a.path || '/'::text || (key_value).key::text
           else
               a.path
           end "path",
           case when a.key_value is not null then
               (a.key_value).value
           else
               array_value
           end "value"
     from
        (select 
           release_id,
           path,
           jsonb_each(case when jsonb_typeof(value) = 'object' then value else '{}'::jsonb end) key_value,
           jsonb_array_elements(case when jsonb_typeof(value) = 'array' and jsonb_typeof(value -> 0) = 'object' then value else '[]'::jsonb end) "array_value"
           from all_paths
        ) a
   )
)
SELECT release_id, path,
   case when (jsonb_typeof(value) = 'array' and jsonb_typeof(value -> 0) = 'object') or jsonb_typeof(value) = 'object' then null else value end  
   into flattened
FROM all_paths
''')

'Success'

In [6]:
session.run_sql('''
   select path, count(*) total, count(distinct release_id) distinct_release, count(distinct value) distinct_value from flattened group by 1 having  count(distinct release_id)  > 5 order by 3  desc
''')

path,total,distinct_release,distinct_value
tender/awardPeriod/endDate,103,103,103
awards/amendment,320,103,0
awards/amendment/amendsReleaseID,320,103,281
awards/amendment/changes,1260,103,0
tender/tenderers/contactPoint/telephone,297,103,268
tender/tenderers/contactPoint/url,297,103,291
tender/tenderers/id,297,103,297
tender/tenderers/identifier,297,103,0
tender/tenderers/identifier/id,297,103,297
tender/tenderers/identifier/legalName,297,103,273


In [7]:
results = session.get_results('''
select path, count(*) total, count(distinct release_id) distinct_release, count(distinct value) distinct_value from flattened group by 1 having  count(distinct release_id)  > 5 order by length(path)'''
                             )

In [8]:
results_for_gcharts = [['Field', 'Parent','Total Count','Distinct Releases'],
                       ['releases', None, 103, 0]]
for row in results['data']:
    split_path = row[0].split('/')
    parent = 'releases' if len(split_path) == 1 else '.'.join(split_path[:-1])
    results_for_gcharts.append(['.'.join(split_path), parent, int(row[1]), int(row[2])])
results_for_gcharts[:14]

[['Field', 'Parent', 'Total Count', 'Distinct Releases'],
 ['releases', None, 103, 0],
 ['id', 'releases', 103, 103],
 ['tag', 'releases', 103, 103],
 ['ocid', 'releases', 103, 103],
 ['date', 'releases', 103, 103],
 ['buyer', 'releases', 103, 103],
 ['awards', 'releases', 423, 103],
 ['tender', 'releases', 103, 103],
 ['parties', 'releases', 412, 103],
 ['planning', 'releases', 103, 103],
 ['language', 'releases', 103, 103],
 ['buyer.id', 'buyer', 103, 103],
 ['contracts', 'releases', 407, 103]]

In [9]:
from IPython.core.display import display, HTML
import json
display(HTML(
'''
  <div id="chart_div" style="width: 900px; height: 500px;"></div>
  <script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>    
  <script type="text/javascript">
  var checkExist = setInterval(function() {
    if (google) {
      google.charts.load('current', {'packages':['treemap']});
      google.charts.setOnLoadCallback(drawChart);
      function drawChart() {
        var data = google.visualization.arrayToDataTable(%s);

        tree = new google.visualization.TreeMap(document.getElementById('chart_div'));

        tree.draw(data, {
          minColor: '#f00',
          midColor: '#ddd',
          maxColor: '#0d0',
          headerHeight: 15,
          fontColor: 'black',
          showScale: true
        });
      }
      clearInterval(checkExist);
     }
   }, 100);
      

    </script>


''' % json.dumps(results_for_gcharts)
))