I see many ways of doing this, some better, some slower. But I prefer to do it without manually having to select the columns which I would like to flatten. So this is my way.

In [None]:
# Import Libraries
import json
import pandas as pd

In [None]:
# Open JSON file
file = open('../input/tiktok-trending-december-2020/trending.json', encoding="utf8")

# Parse JSON
data = json.load(file)

# Close file
file.close()

# Show amount of objects
len(data['collector'])

In [None]:
# Show example of object
print(json.dumps(data['collector'][4], indent=4, sort_keys=True))

## Fast Method
So we would like to parse the JSON data in the following way:
- `hashtags` and `mentions` are lists, so we would like to "explode" them: split the list and store it as separated rows, together with all other information (copying other fields);
    - `hashtags` contains a list of objects, so we would like to split the objects to separate columns afterwords;
- Split every other cell containing objects to separate columns

In [None]:
%%timeit

# This will split the objects to separate columns and store everything as a DataFrame
df = pd.json_normalize(data['collector'])

# Explode the fields containing lists, to separate rows
df = df.explode('hashtags').explode('mentions')

# Converting the dataframe back to JSON format, so we can normalize again
df = df.to_json(orient='records')

# Parse the JSON data
parsed_json = json.loads(df)

# Normalize again and recreate the dataframe
df = pd.json_normalize(parsed_json)

# Drop unused column
df = df.drop('hashtags', axis=1)
df

In [None]:
# This will split the objects to separate columns and store everything as a DataFrame
df = pd.json_normalize(data['collector'])

# Explode the fields containing lists, to separate rows
df = df.explode('hashtags').explode('mentions')

# Converting the dataframe back to JSON format, so we can normalize again
df = df.to_json(orient='records')

# Parse the JSON data
parsed_json = json.loads(df)

# Normalize again and recreate the dataframe
df = pd.json_normalize(parsed_json)

# Drop unused column
df = df.drop('hashtags', axis=1)

In [None]:
df

In [None]:
df.info()

## Slow method

In [None]:
%%timeit 

def object_to_columns(dfRow, **kwargs):
    '''Function to expand cells containing dictionaries, to columns'''
    for column, prefix in kwargs.items():
        if isinstance(dfRow[column], dict):
            for key, value in dfRow[column].items():
                columnName = '{}.{}'.format(prefix, key)
                dfRow[columnName] = value
    return dfRow

# Create a DataFrame of the data
df_tiktok_dataset = pd.DataFrame(data['collector'])

# Let's expand the hashtag cell containing lists to multiple rows
df_tiktok_dataset = df_tiktok_dataset.explode('hashtags').explode('mentions')

# Expand certain cells containing dictionaries to columns
df_tiktok_dataset = df_tiktok_dataset.apply(object_to_columns, 
                            authorMeta='authorMeta',  
                            musicMeta='musicMeta',
                            covers='cover',
                            videoMeta='videoMeta',
                            hashtags='hashtag', axis = 1)

# Remove the original columns containing the dictionaries
df_tiktok_dataset = df_tiktok_dataset.drop(['authorMeta','musicMeta','covers','videoMeta','hashtags'], axis = 1)


In [None]:
def object_to_columns(dfRow, **kwargs):
    '''Function to expand cells containing dictionaries, to columns'''
    for column, prefix in kwargs.items():
        if isinstance(dfRow[column], dict):
            for key, value in dfRow[column].items():
                columnName = '{}.{}'.format(prefix, key)
                dfRow[columnName] = value
    return dfRow

# Create a DataFrame of the data
df_tiktok_dataset = pd.DataFrame(data['collector'])

# Let's expand the hashtag cell containing lists to multiple rows
df_tiktok_dataset = df_tiktok_dataset.explode('hashtags').explode('mentions')

# Expand certain cells containing dictionaries to columns
df_tiktok_dataset = df_tiktok_dataset.apply(object_to_columns, 
                            authorMeta='authorMeta',  
                            musicMeta='musicMeta',
                            covers='cover',
                            videoMeta='videoMeta',
                            hashtags='hashtag', axis = 1)

# Remove the original columns containing the dictionaries
df_tiktok_dataset = df_tiktok_dataset.drop(['authorMeta','musicMeta','covers','videoMeta','hashtags'], axis = 1)

In [None]:
df_tiktok_dataset

In [None]:
df_tiktok_dataset.info()