## a JSON format compatible with the Pandas data structure
### introduction
The data type is not explicitely taken into account in the current JSON interface.     
    
The existing solution is to use a data schema in addition.

### proposal
To have a simple and compact solution, I propose to use the [JSON-NTV format (Named and Typed Value)](https://github.com/loco-philippe/NTV#readme) - which integrates the notion of type - and its JSON-TAB variation for tabular data.

### content
This NoteBook uses examples to present some key points

In [1]:
from json_ntv import NtvSingle, NtvList, Ntv, NtvConnector, NtvType, Namespace
from datetime import date, datetime
import pandas as pd
import pprint
pp = pprint.PrettyPrinter(compact=True, width=100)
pr = pp.pprint

In [2]:
s_obj = pd.Series(["a", "b", "c", "a"], dtype="category")
i_obj = pd.Series([1,2,1,2], dtype="category")

print('categorical object Series  : ', Ntv.obj(s_obj))
print('categorical integer Series : ', Ntv.obj(i_obj))

categorical object Series  :  {":field": [["a", "b", "c"], [0, 1, 2, 0]]}
categorical integer Series :  {":field": [[1, 2], [0, 1, 0, 1]]}


In [4]:
s_obj = pd.Series(["a", "b", "c", "a"], name='string', dtype="category")
i_obj = pd.Series([1,2,1,2], name='integer', dtype="category")

print('categorical object Series  : ', Ntv.obj(s_obj))
print('categorical integer Series : ', Ntv.obj(i_obj))

categorical object Series  :  {":field": {"string": [["a", "b", "c"], [0, 1, 2, 0]]}}
categorical integer Series :  {":field": {"integer": [[1, 2], [0, 1, 0, 1]]}}


In [7]:
s_df = pd.DataFrame({"A": list("abca"), "B": list("bccd")}, dtype="category")

print('categorical object DataFrame  : ')
print(Ntv.obj(s_df))

categorical object DataFrame  : 
{":tab": {"index": [0, 1, 2, 3], "A": [["a", "b", "c"], [0, 1, 2, 0]], "B": [["b", "c", "d"], [0, 1, 1, 2]]}}


In [None]:


field_data = {'dates::datetime': ['1964-01-01', '1985-02-05', '2022-01-21']}
tab_data   = {'index':           [1, 2, 3],
              'dates::datetime': ['1964-01-01', '1985-02-05', '2022-01-21'], 
              'value':           [10, 20, 30],
              'value32::int32':  [10, 20, 30],
              'coord::point':    [[1,2], [3,4], [5,6]],
              'names::string':   ['john', 'eric', 'judith']}

field = Ntv.obj({':field': field_data})
tab   = Ntv.obj({':tab'  : tab_data})

# the DataFrame Connector is associated with NtvType 'tab' in dicobj 
sr = field.to_obj(encode_format='obj', dicobj={'field': 'SeriesConnec'})
df = tab.to_obj  (encode_format='obj', dicobj={'tab': 'DataFrameConnec'})

# pandas dtype conform to Ntv type
print(sr, '\n')
print(df.dtypes)
df

In [None]:
# the dataframe generated from JSON-NTV data is identical to the initial dataframe
df2 = Ntv.obj(df).to_obj(encode_format='obj', dicobj={'tab': 'DataFrameConnec'})
print('df2 is identical to df ? ', df2.equals(df))


In [None]:

data   = {'index':           [100, 200, 300, 400, 500, 600],
          'dates::date':     ['1964-01-01', '1985-02-05', '2022-01-21', '1964-01-01', '1985-02-05', '2022-01-21'], 
          'value':           [10, 10, 20, 20, {'valid?': 30}, 30],
          'value32::int32':  [12, 12, 22, 22, 32, 32],
          'res':             {'res1': 10, 'res2': 20, 'res3': 30, 'res4': 10, 'res5': 20, 'res6': 30},
          'coord::point':    [[1,2], [3,4], [5,6], [7,8], {'same as 2nd point': [3,4]}, [5,6]],
          'names::string':   ['john', 'eric', 'judith', 'mila', 'hector', 'maria'],
          'unique:boolean':  True }

df  = Ntv.obj({':tab'  : data}).to_obj  (encode_format='obj', dicobj={'tab': 'DataFrameConnec'})

# preservation of type
print(df.dtypes)

# preservation of index
df


In [None]:
from observation import Ilist

ntv2 = Ilist.ntv(Ntv.obj(df).val).to_ntv('full')
print(ntv2)
df2 = Ntv.obj({':tab': ntv2.to_obj()}).to_obj(encode_format='obj', dicobj={'tab': 'DataFrameConnec'})

print(df2.dtypes)
df2

In [None]:
from observation import Ilist

ntv2 = Ilist.ntv(Ntv.obj(df).val).to_ntv('default')
print(ntv2)
#print(Ilist.ntv(ntv2).to_ntv('full'))
df3 = Ntv.obj({':tab': Ilist.ntv(ntv2).to_ntv('full').to_obj()}).to_obj(encode_format='obj', dicobj={'tab': 'DataFrameConnec'})

print(df3.dtypes)
df3

In [None]:
from observation import Ilist

ntv2 = Ilist.ntv(Ntv.obj(df).val).to_ntv()
print(ntv2)
#print(Ilist.ntv(ntv2).to_ntv('full'))
df3 = Ntv.obj({':tab': Ilist.ntv(ntv2).to_ntv('full').to_obj()}).to_obj(encode_format='obj', dicobj={'tab': 'DataFrameConnec'})

print(df3.dtypes)
df3

In [None]:
data   = {'index':           [10, 20, 30],
              'dates::datetime': ['1964-01-01', '1985-02-05', '2022-01-21'], 
              'value':           [10, 20, {'valid?': 30}],
              'value32::int32':  [10, 20, 30],
              'res':             {'res1': 10, 'res2': 20, 'res3': 30},
              'coord::point':    [[1,2], [3,4], [5,6]],
              'names::string':   ['john', 'eric', 'judith'],
              'unique:boolean': True}
df  = Ntv.obj({':tab'  : data}).to_obj  (encode_format='obj', dicobj={'tab': 'DataFrameConnec'})
print(df.dtypes)
df
