In [1]:
import sys
sys.path.insert(0, '..')

In [2]:
import numpy as np
import pandas as pd
import flat_table
from tabulate import tabulate

### Example Dataset

In [3]:
data = [
    (
        1001, 
        { 'first_name': 'john', 'last_name': 'smith', 'phones': {'mobile': '201-..', 'home': '978-..'} }, 
        [{ 'zip': '07014', 'city': 'clifton' }]
    ),
    (
        1002, 
        np.nan, 
        [{'zip': '07014', 'address1': '1 Journal Square'}]
    ),
    (
        1003, 
        { 'first_name': 'marry', 'last_name': 'kate', 'gender': 'female' }, 
        [{ 'zip': '10001', 'city': 'new york' }, { 'zip': '10008', 'city': 'brooklyn' }]
    ),
]

In [4]:
df = pd.DataFrame(data, columns=['id', 'user_info', 'address'])

In [5]:
df

Unnamed: 0,id,user_info,address
0,1001,"{'first_name': 'john', 'last_name': 'smith', '...","[{'zip': '07014', 'city': 'clifton'}]"
1,1002,,"[{'zip': '07014', 'address1': '1 Journal Squar..."
2,1003,"{'first_name': 'marry', 'last_name': 'kate', '...","[{'zip': '10001', 'city': 'new york'}, {'zip':..."


### Using flat_table

In [6]:
flat_table.normalize(df)

Unnamed: 0,index,id,user_info.gender,user_info.phones.home,user_info.phones.mobile,user_info.last_name,user_info.first_name,address.address1,address.city,address.zip
0,0,1001,,978-..,201-..,smith,john,,clifton,7014
1,1,1002,,,,,,1 Journal Square,,7014
2,2,1003,female,,,kate,marry,,new york,10001
3,2,1003,female,,,kate,marry,,brooklyn,10008


### Mapper function

In [7]:
mapper = flat_table.mapper(df)
mapper['obj'] = '...'
mapper

Unnamed: 0,parent,child,type,obj
0,.,id,int,...
1,.,user_info,dict,...
2,user_info,user_info.gender,str,...
3,user_info,user_info.phones.home,str,...
4,user_info,user_info.phones.mobile,str,...
5,user_info,user_info.last_name,str,...
6,user_info,user_info.first_name,str,...
7,.,address,list,...
8,,address,dict,...
9,address,address.address1,str,...


In [8]:
print(mapper.to_markdown())

|    | parent    | child                   | type   | obj   |
|---:|:----------|:------------------------|:-------|:------|
|  0 | .         | id                      | int    | ...   |
|  1 | .         | user_info               | dict   | ...   |
|  2 | user_info | user_info.gender        | str    | ...   |
|  3 | user_info | user_info.phones.home   | str    | ...   |
|  4 | user_info | user_info.phones.mobile | str    | ...   |
|  5 | user_info | user_info.last_name     | str    | ...   |
|  6 | user_info | user_info.first_name    | str    | ...   |
|  7 | .         | address                 | list   | ...   |
|  8 |           | address                 | dict   | ...   |
|  9 | address   | address.address1        | str    | ...   |
| 10 | address   | address.city            | str    | ...   |
| 11 | address   | address.zip             | str    | ...   |


In [9]:
final = flat_table.normalize(df)
final

Unnamed: 0,index,id,user_info.gender,user_info.phones.home,user_info.phones.mobile,user_info.last_name,user_info.first_name,address.address1,address.city,address.zip
0,0,1001,,978-..,201-..,smith,john,,clifton,7014
1,1,1002,,,,,,1 Journal Square,,7014
2,2,1003,female,,,kate,marry,,new york,10001
3,2,1003,female,,,kate,marry,,brooklyn,10008


In [10]:
print(final.to_markdown())

|    |   index |   id | user_info.gender   | user_info.phones.home   | user_info.phones.mobile   | user_info.last_name   | user_info.first_name   | address.address1   | address.city   |   address.zip |
|---:|--------:|-----:|:-------------------|:------------------------|:--------------------------|:----------------------|:-----------------------|:-------------------|:---------------|--------------:|
|  0 |       0 | 1001 | nan                | 978-..                  | 201-..                    | smith                 | john                   | nan                | clifton        |         07014 |
|  1 |       1 | 1002 | nan                | nan                     | nan                       | nan                   | nan                    | 1 Journal Square   | nan            |         07014 |
|  2 |       2 | 1003 | female             | nan                     | nan                       | kate                  | marry                  | nan                | new york       |       

### New in Version 1.1.0

In [11]:
final = flat_table.normalize(df, expand_dicts=False, expand_lists=True)
final

Unnamed: 0,index,id,user_info,address.address1,address.city,address.zip
0,0,1001,"{'first_name': 'john', 'last_name': 'smith', '...",,clifton,7014
1,1,1002,,1 Journal Square,,7014
2,2,1003,"{'first_name': 'marry', 'last_name': 'kate', '...",,new york,10001
3,2,1003,"{'first_name': 'marry', 'last_name': 'kate', '...",,brooklyn,10008


In [12]:
print(final.to_markdown())

|    |   index |   id | user_info                                                                                      | address.address1   | address.city   |   address.zip |
|---:|--------:|-----:|:-----------------------------------------------------------------------------------------------|:-------------------|:---------------|--------------:|
|  0 |       0 | 1001 | {'first_name': 'john', 'last_name': 'smith', 'phones': {'mobile': '201-..', 'home': '978-..'}} | nan                | clifton        |         07014 |
|  1 |       1 | 1002 | nan                                                                                            | 1 Journal Square   | nan            |         07014 |
|  2 |       2 | 1003 | {'first_name': 'marry', 'last_name': 'kate', 'gender': 'female'}                               | nan                | new york       |         10001 |
|  3 |       2 | 1003 | {'first_name': 'marry', 'last_name': 'kate', 'gender': 'female'}                               | nan 

### Comparison with json_normalize()

In [13]:
pd.json_normalize(df.user_info, max_level=0)

AttributeError: 'float' object has no attribute 'items'