In [6]:
import json
import pandas as pd
from pandas.io.json import json_normalize

from datetime import datetime

In [7]:
# Here are the account names
accounts_list = [
    'burberry',
    'gucci',
    'toryburch',
    'michaelkors',
    'bananarepublic',
    'majeofficiel',
    'aliceandolivia',
    'coach',
    'ferragamo',
    'chloe'
]

In [12]:
# Make 2 dataframes, one for account info, one for posts

# Hold each account's dfs to concat later
account_info_df_list = []
account_posts_dfs_list = []

# Process each account json file
for account in accounts_list:
    
# Load the json file
    account_dict = json.load(open(f'data/appended/appended_{account}.json'))
    
# Store the top-level info in accounts info df

    account_info_df = json_normalize(account_dict)
    # Trim down the dataframe for only essential columns
    account_info_df = account_info_df[[
        'account_name', 'follower_count',
        'following_count', 'post_count'
    ]]
    # Add to list to concat later
    account_info_df_list.append(account_info_df)
    
# Make the account posts df from post data list

    posts_data_list = account_dict['post_data_list']
    account_posts_df = json_normalize(posts_data_list)
    
    # Add a column for account name to use as key later
    account_posts_df['account_name'] = account_dict['account_name']
    
    # Parse the datetime string to save as datetime object
    account_posts_df['post_datetime'] = [
        datetime.strptime(each, '%Y-%m-%dT%H:%M:%S.000Z') \
        for each in account_posts_df['post_datetime_str']
    ]
    # Rearrange columns for easier reading
    account_posts_df = account_posts_df[[
        'account_name', 'post_datetime', 'post_type',
        'like_count', 'view_count',
        'has_hashtag', 'hashtag_count', 'has_at', 'at_count',
        'post_url'
    ]]
    
    # Add to list ot concat later
    account_posts_dfs_list.append(account_posts_df)

In [15]:
# Concat the lists to make 2 final dfs
all_accounts_info_df = pd.concat(account_info_df_list)
all_accounts_posts_df = pd.concat(account_posts_dfs_list)

In [18]:
# Save files
all_accounts_info_df.to_csv('data/accounts.csv', index=False)
all_accounts_posts_df.to_csv('data/posts.csv', index=False)

In [21]:
# Unify the two dataframes
unified_df = all_accounts_posts_df.merge(all_accounts_info_df, 
                      how='left', on='account_name')

In [24]:
unified_df.to_csv('data/posts_with_account.csv', index=False)