# JSON to CSV the right way

In [1]:
import pandas as pd

In [2]:
#____________________________________________________________________ SIMPLE JSON AND LIST
simple_json = {
    'name': 'David',
    'city': 'London',
    'income': 80000,
}

simple_json_2 = {
    'name': 'Taylor',
    'city': 'Chicago',
    'income': 120000,
}

simple_json_list = [
    simple_json, 
    simple_json_2

]

#____________________________________________________________________ MULTIPLE LEVELS JSON AND LIST

multiple_levels_json = {
    'name': 'David',
    'city': 'London',
    'income': 80000,
    'skills': {
        'python': 'advanced',
        'SQL': 'advanced',
        'GCP': 'mid'
    },
    'roles': {
        "project manager":False,
        "data engineer":False,
        "data scientist":True,
        "data analyst":False,
        }
    }



multiple_levels_json_2 = {
    'name': 'Taylor',
    'city': 'Chicago',
    'income': 120000,
    'skills': {
        'python': 'mid',
        'SQL': 'advanced',
        'GCP': 'beginner'
    },
    'roles': {
        "project manager":False,
        "data engineer":False,
        "data scientist":False,
        "data analyst":True
        }
    }

multiple_level_json_list = [
    multiple_levels_json, 
    multiple_levels_json_2

]


#____________________________________________________________________ MULTIPLE LEVELS JSON AND LIST WITH NESTED LIST

nested_json = {
    'name': 'David',
    'city': 'London',
    'income': 80000,
    'skills': ["python", "SQL","GCP"],
    'roles': {
        "project manager":False,
        "data engineer":False,
        "data scientist":True,
        "data analyst":False,
        }
    }



nested_json_2 = {
    'name': 'Taylor',
    'city': 'Chicago',
    'income': 120000,
    'skills': ["python", "SQL","PowerBI","Looker"],
    'roles': {
        "project manager":False,
        "data engineer":False,
        "data scientist":False,
        "data analyst":True
        }
    }

nested_json_list = [
    nested_json, 
    nested_json_2

]

## Converting a simple JSON
### A single dictionary

In [3]:
pd.json_normalize(simple_json)

Unnamed: 0,name,city,income
0,David,London,80000


### A list of dictionaries

In [4]:
pd.json_normalize(simple_json_list)

Unnamed: 0,name,city,income
0,David,London,80000
1,Taylor,Chicago,120000


In [5]:
simple_json = {
    'name': 'David',
    'city': 'London',
}

simple_json_2 = {
    'name': 'Taylor',
    'city': 'Chicago',
    'income': 120000,
}

simple_json_list = [
    simple_json, 
    simple_json_2

]

pd.json_normalize(simple_json_list)

Unnamed: 0,name,city,income
0,David,London,
1,Taylor,Chicago,120000.0


## Selecting only those columns of our interest

In [6]:
# Fields to include
fields = ['name', 'city']

# Filter the JSON data
filtered_json_list = [{key: value for key, value in item.items() if key in fields} for item in simple_json_list]

pd.json_normalize(filtered_json_list)

Unnamed: 0,name,city
0,David,London
1,Taylor,Chicago


## Converting a JSON with multiple levels
### A single dictionary

In [7]:
pd.json_normalize(multiple_levels_json)

Unnamed: 0,name,city,income,skills.python,skills.SQL,skills.GCP,roles.project manager,roles.data engineer,roles.data scientist,roles.data analyst
0,David,London,80000,advanced,advanced,mid,False,False,True,False


In [8]:
pd.json_normalize(multiple_levels_json, max_level=0)

Unnamed: 0,name,city,income,skills,roles
0,David,London,80000,"{'python': 'advanced', 'SQL': 'advanced', 'GCP...","{'project manager': False, 'data engineer': Fa..."


In [14]:
pd.json_normalize(multiple_levels_json)

Unnamed: 0,name,city,income,skills.python,skills.SQL,skills.GCP,roles.project manager,roles.data engineer,roles.data scientist,roles.data analyst
0,David,London,80000,advanced,advanced,mid,False,False,True,False


### A list of dictionaries

In [10]:
pd.json_normalize(multiple_level_json_list)

Unnamed: 0,name,city,income,skills.python,skills.SQL,skills.GCP,roles.project manager,roles.data engineer,roles.data scientist,roles.data analyst
0,David,London,80000,advanced,advanced,mid,False,False,True,False
1,Taylor,Chicago,120000,mid,advanced,beginner,False,False,False,True


## Converting a JSON with multiple levels and a nested List
### A single dictionary

In [13]:
pd.json_normalize(nested_json_list)

Unnamed: 0,name,city,income,skills,roles.project manager,roles.data engineer,roles.data scientist,roles.data analyst
0,David,London,80000,"[python, SQL, GCP]",False,False,True,False
1,Taylor,Chicago,120000,"[python, SQL, PowerBI, Looker]",False,False,False,True


In [12]:
pd.json_normalize(
    nested_json, 
    record_path=['skills'],
    meta=['name', 'city']
    )

Unnamed: 0,0,name,city
0,python,David,London
1,SQL,David,London
2,GCP,David,London
