# Dataset : structure

## Goals

- understand the structure of Dataset object
- introduce methods for managing structure

-----

## managing Field
Field methods are available for all the Field inside the Dataset object


In [1]:
from observation import Sdataset

il = Sdataset.ntv({'month':       ['jan',   'jan',  'apr',       'apr',  'sep'     ],
                   'city':        ['paris', 'lyon', 'marseille', 'lyon', 'toulouse'],
                   'temperature': [ 12,      14,     21,          16,      21      ]})
print('codec length for Field defaultcodec : ', il.indexlen)
il.tostdcodec(inplace=True)                                      # apply full codec to all Field
print('codec length for Field full codec    : ', il.indexlen)
il.reindex()                                                     # apply default codec to all Field
il.swapindex([2, 0, 1])                                          # 'temperature' Field is the first
print('new Field order : ', il.lname)



codec length for Field defaultcodec :  [3, 4, 4]
codec length for Field full codec    :  [5, 5, 5]
new Field order :  ['temperature', 'month', 'city']


## ordering

Values and keys can be ordered for all Field.

In [2]:
il.sort(order=[1, 2])
print('sorted by month and city : \n', il)

il.sort(order=[2, 1], reverse=True)
print('reverse sorted by city and month : \n', il)

il.sort(order=[0], func=None)
print('sorted by temperature : \n', il)

il.sort(order=[2], func=lambda x: len(x))
print('sorted by city name length  : \n', il)

sorted by month and city : 
 variables :
    {'temperature': [16, 21, 14, 12, 21]}
    {'month': ['apr', 'apr', 'jan', 'jan', 'sep']}
    {'city': ['lyon', 'marseille', 'lyon', 'paris', 'toulouse']}

reverse sorted by city and month : 
 variables :
    {'temperature': [21, 12, 21, 14, 16]}
    {'month': ['sep', 'jan', 'apr', 'jan', 'apr']}
    {'city': ['toulouse', 'paris', 'marseille', 'lyon', 'lyon']}

sorted by temperature : 
 variables :
    {'temperature': [12, 14, 16, 21, 21]}
    {'month': ['jan', 'jan', 'apr', 'apr', 'sep']}
    {'city': ['paris', 'lyon', 'lyon', 'marseille', 'toulouse']}

sorted by city name length  : 
 variables :
    {'temperature': [14, 16, 12, 21, 21]}
    {'month': ['jan', 'apr', 'jan', 'sep', 'apr']}
    {'city': ['lyon', 'lyon', 'paris', 'toulouse', 'marseille']}



## reordering
records can be reordered for all Fields.

In [3]:
il.reorder([2, 0, 1, 3, 4])
print('reordered records  : \n', il)

reordered records  : 
 variables :
    {'temperature': [12, 14, 16, 21, 21]}
    {'month': ['jan', 'jan', 'apr', 'sep', 'apr']}
    {'city': ['paris', 'lyon', 'lyon', 'toulouse', 'marseille']}



## filtering
A Dataset object can be filter with a boolean Field.

In [4]:
filt = list(map(lambda x: x>'b', il.nindex('month')))   # set a filter (True if 'month' > 'b')
print(filt)

il.setfilter(filt)
il2 = il.applyfilter(inplace=False, delfilter=False)
il3 = il.applyfilter(inplace=False, reverse=True)

print('\nil copy with only records with true Field filter : \n', il2)
print('\nil copy with only records with false Field filter : \n', il3)

[True, True, False, True, False]

il copy with only records with true Field filter : 
 index :
    {'temperature': [21, 14, 12]}
    {'month': ['sep', 'jan', 'jan']}
    {'city': ['toulouse', 'lyon', 'paris']}


il copy with only records with false Field filter : 
 index :
    {'temperature': [16, 21]}
    {'month': ['apr', 'apr']}
    {'city': ['lyon', 'marseille']}



## adding
Two Dataset objects with different Fields can be added ('add' method is available only if the Field names are identical between the two Datasets)

In [5]:
il2 = Sdataset.ntv({'country':     ['france', 'france', 'france'],
                    'city':        ['paris', 'lyon', 'strasbourg']})

il_total = il.mix(il2)

print('\naddition between two different Dataset objects:\n\n', il_total)


addition between two different Dataset objects:

 variables :
    {'temperature': [12, 14, 16, 21, 21, None, None, None]}
    {'month': ['jan', 'jan', 'apr', 'sep', 'apr', None, None, None]}
    {'city': ['paris', 'lyon', 'lyon', 'toulouse', 'marseille', 'paris', 'lyon', 'strasbourg']}
    {'country': [None, None, None, None, None, 'france', 'france', 'france']}

