# Dataset : aggregation

## Goals

- understand the Dataset aggregation process

<img src="https://loco-philippe.github.io/ES/ilist_aggregation.png" width="800">


-----


In [1]:
from observation import Sdataset
from pprint import pprint

## example
In this example, the documentation process has two step :
- a simple documentation for each people
- an aggregation of all results

The merge method provides a complete Dataset image of all the results (aggregation is the reference, merge is the image)

<img src="https://loco-philippe.github.io/ES/ilist_merge.png" width="700">


In [2]:
aw = Sdataset.ntv( {'score'     : [11, 13, 15, 10, 12], 
                 'course'    : ['math', 'math', 'math', 'english', 'english'],
                 'year'      : [2021, 2021, 2021, 2021, 2021],
                 'examen'    : ['t1', 't2', 't3', 't2', 't3']})              
pw = Sdataset.ntv( {'score'     : [15, 8], 
                 'year'      : [2021, 2021],
                 'course'    : ['math', 'english'],
                 'examen'    : ['t1', 't2']})  
cr = Sdataset.ntv( {'score'     : [17, 18, 2, 4], 
                 'course'    : ['software', 'software', 'english', 'english'],
                 'year'      : [2021, 2021, 2021, 2021],
                 'examen'    : ['t3', 't2', 't1', 't2']})               
pb = Sdataset.ntv( {'score'     : [18, 6], 
                 'course'    : ['software', 'english'],
                 'year'      : [2021, 2021],
                 'examen'    : ['t3', 't1']})        

total = Sdataset.ntv({'aggregate': [aw, pw, cr, pb],
                      'full name': ['anne white', 'philippe white', 'camille red', 'philippe black'],
                      'last name': ['white', 'white', 'red', 'black'],
                      'first name':['anne', 'philippe', 'camille', 'philippe'],
                      'surname':   ['skyler', 'heisenberg', 'saul', 'gus'],
                      'group':     ['gr1', 'gr2', 'gr3','gr3']}, fast=True)

print('the aggregate Dataset has the same property as non aggregate Dataset :\n')
print('record number : ', len(total), 'Field number : ', total.lenindex)
print('\nbinary representation : \n', total.to_ntv().to_obj(encoded=True, format='cbor'))
print('\njson representation : \n', total.to_ntv())

the aggregate Dataset has the same property as non aggregate Dataset :

record number :  4 Field number :  6

binary representation : 
 b'\xa6naggregate::tab\x84\xa4escore\x85\x0b\r\x0f\n\x0cfcourse\x82\x82dmathgenglish\x85\x00\x00\x00\x01\x01dyear\x19\x07\xe5fexamen\x82\x83bt1bt2bt3\x85\x00\x01\x02\x01\x02\xa4escore\x82\x0f\x08dyear\x19\x07\xe5fcourse\x82dmathgenglishfexamen\x82bt1bt2\xa4escore\x84\x11\x12\x02\x04fcourse\x82\x82hsoftwaregenglish\x81\x02dyear\x19\x07\xe5fexamen\x82\x83bt3bt2bt1\x84\x00\x01\x02\x01\xa4escore\x82\x12\x06fcourse\x82hsoftwaregenglishdyear\x19\x07\xe5fexamen\x82bt3bt1ifull name\x84janne whitenphilippe whitekcamille rednphilippe blackilast name\x82\x83ewhitecredeblack\x84\x00\x00\x01\x02jfirst name\x82\x83dannehphilippegcamille\x84\x00\x01\x02\x01gsurname\x84fskylerjheisenbergdsaulcgusegroup\x82\x83cgr1cgr2cgr3\x84\x00\x01\x02\x02'

json representation : 
 {"aggregate::tab": [{"score": [11, 13, 15, 10, 12], "course": [["math", "english"], [0, 0, 0, 1, 1]], "

In [3]:
img = total.merge(simplename=True)
print('complete image :')
img.view(tablefmt='html')

complete image :


score,course,year,examen,full name,last name,first name,surname,group
11,"""math""",2021,"""t1""","""anne white""","""white""","""anne""","""skyler""","""gr1"""
13,"""math""",2021,"""t2""","""anne white""","""white""","""anne""","""skyler""","""gr1"""
15,"""math""",2021,"""t3""","""anne white""","""white""","""anne""","""skyler""","""gr1"""
10,"""english""",2021,"""t2""","""anne white""","""white""","""anne""","""skyler""","""gr1"""
12,"""english""",2021,"""t3""","""anne white""","""white""","""anne""","""skyler""","""gr1"""
15,"""math""",2021,"""t1""","""philippe white""","""white""","""philippe""","""heisenberg""","""gr2"""
8,"""english""",2021,"""t2""","""philippe white""","""white""","""philippe""","""heisenberg""","""gr2"""
17,"""software""",2021,"""t3""","""camille red""","""red""","""camille""","""saul""","""gr3"""
18,"""software""",2021,"""t2""","""camille red""","""red""","""camille""","""saul""","""gr3"""
2,"""english""",2021,"""t1""","""camille red""","""red""","""camille""","""saul""","""gr3"""


In [4]:
img.nindex('score').tostdcodec(inplace=True)
print('\ncomplete image optimize format: \n', img.to_ntv())

img.to_file('score.il', format='cbor')                # reuse in other Notebook

img2 = total.merge(simplename=True).full(idxname=['course', 'full name', 'examen'], inplace=False).setcanonorder()
img2.nindex('score').tostdcodec(inplace=True)
print('\ncomplete image optimize full format: \n', img2.to_ntv())
img2.view(tablefmt='html')


complete image optimize format: 
 {"score": [11, 13, 15, 10, 12, 15, 8, 17, 18, 2, 4, 18, 6], "course": [["math", "english", "software"], [0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 1]], "year": 2021, "examen": [["t1", "t2", "t3"], [0, 1, 2, 1, 2, 0, 1, 2, 1, 0, 1, 2, 0]], "full name": [["anne white", "philippe white", "camille red", "philippe black"], [0, 0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3]], "last name": [["white", "red", "black"], 4, [0, 0, 1, 2]], "first name": [["anne", "philippe", "camille"], 4, [0, 1, 2, 1]], "surname": [["skyler", "heisenberg", "saul", "gus"], 4], "group": [["gr1", "gr2", "gr3"], 4, [0, 1, 2, 2]]}

complete image optimize full format: 
 {"course": [["math", "english", "software"], [12]], "examen": [["t1", "t2", "t3"], [4]], "full name": [["anne white", "philippe white", "camille red", "philippe black"], [1]], "last name": [["white", "red", "black"], 2, [0, 0, 1, 2]], "first name": [["anne", "philippe", "camille"], 2, [0, 1, 2, 1]], "group": [["gr1", "gr2", "gr3"], 2,

course,examen,full name,last name,first name,group,surname,score,year
"""math""","""t1""","""anne white""","""white""","""anne""","""gr1""","""skyler""",11,2021
"""math""","""t1""","""philippe white""","""white""","""philippe""","""gr2""","""heisenberg""",15,2021
"""math""","""t1""","""camille red""","""red""","""camille""","""gr3""","""saul""","""-""",2021
"""math""","""t1""","""philippe black""","""black""","""philippe""","""gr3""","""gus""","""-""",2021
"""math""","""t2""","""anne white""","""white""","""anne""","""gr1""","""skyler""",13,2021
"""math""","""t2""","""philippe white""","""white""","""philippe""","""gr2""","""heisenberg""","""-""",2021
"""math""","""t2""","""camille red""","""red""","""camille""","""gr3""","""saul""","""-""",2021
"""math""","""t2""","""philippe black""","""black""","""philippe""","""gr3""","""gus""","""-""",2021
"""math""","""t3""","""anne white""","""white""","""anne""","""gr1""","""skyler""",15,2021
"""math""","""t3""","""philippe white""","""white""","""philippe""","""gr2""","""heisenberg""","""-""",2021
