# User History Time Series

Notebook used to create time series of the video watching history data that you got from TikTok’s user_data.json file contains timestamps of when videos were watched. Creates a single time series with all users (each with a different color) that shows their usage of TikTok over time, subset for 10/7 - 12/7

**Author: Audrey Yip & Jenni**

In [1]:
import os
import pandas as pd
import plotly.express as px

In [2]:
# Load data from JSON files, this is for the newest version of data
json_folder = "../pre-processing/url-json-oct7"  
json_files = [file for file in os.listdir(json_folder) if file.endswith('.json')]
dfs = []
print(json_files)

['Sec2Gr3_77217.json', 'Sec2Gr3_99568.json', 'Sec2Gr3_18853.json', 'Sec2Gr3_20688.json', 'Sec2Gr3_74721.json']


In [3]:
# Handle files with different formats
for file in json_files:
    filepath = os.path.join(json_folder, file)
    json_data = pd.read_json(filepath)

    if isinstance(json_data, dict) and 'VideoList' in json_data:
        # Process files with 'VideoList' key
        json_data = json_data['VideoList']
        df = pd.json_normalize(json_data)
    else:
        # Process files with different format
        df = pd.DataFrame(json_data)

    user = file.split('.')[0]
    df['User'] = user
    dfs.append(df)

merged_df = pd.concat(dfs, ignore_index=True)


In [4]:
# Convert 'Date' column to datetime
merged_df['Date'] = pd.to_datetime(merged_df['Date'])

# Group by 'User' and 'Date' and count the number of entries
count_df = merged_df.groupby(['User', pd.Grouper(key='Date', freq='D')]).size().reset_index(name='Count')

In [5]:
# Plot the count of TikTok usage over time for each user horizontally
fig = px.area(count_df, x='Date', y='Count', color='User',
              title='TikTok Usage Over Time by User',
              labels={'Date': 'Date', 'Count': 'Usage Count', 'User': 'User'})
fig.update_yaxes(title_text="", showgrid=True)
fig.update_xaxes(showgrid=True)
fig.update_layout(height=600, width=1000, title_x=0.5)
fig.add_annotation(text="Usage Count",
                   xref="paper", yref="paper",
                   x=-0.1, y=0.5, textangle=-90,
                   showarrow=False, font=dict(size=16))
fig.show()

In [40]:
# Optional
fig.write_html("tiktok_usage_visualization.html")