In [2]:
import pandas as pd
import json


In [3]:
#1.1 Function for parsing json and getting dataframe
def parse_creator_data(creator_data):
    user_profile = creator_data["user_profile"]
    original_post = creator_data["original_post"]
    social_media = creator_data["social_media"]
    posts_info = pd.DataFrame(creator_data["posts_info"])  # Convert posts to DataFrame

    return {
        "user_profile": user_profile,
        "original_post": original_post,
        "social_media": social_media,
        "posts_info": posts_info
    }
def parse_data(data_file):
    with open(data_file, "r") as file:
        data = json.load(file)
    return [parse_creator_data(creator) for creator in data] 

In [4]:
df = parse_data("user_info.json")

In [83]:
df[0]["posts_info"]["description"].tail()

1    Replying to @Samantha Irvin here are some Chri...
Name: description, dtype: object

In [16]:
# 1.3 Function for filtering data by follower count and retruning usernames
def filter_by_follower_count(data, min_followers, max_followers):
    return [creator["user_profile"]["username"] for creator in data if min_followers <= creator["user_profile"]["tiktok_followers"] <= max_followers]

In [19]:
#1.4 Function for filtering data by post count and returning usernames
def filter_by_post_count(data, min_posts, max_posts):
    return [creator["user_profile"]["username"] for creator in data if min_posts <= creator["posts_info"].shape[0] <= max_posts]

In [28]:
#1.5 Filter by average video views and return usernames
def filter_by_average_video_views(data, min_views, max_views):
    return [creator["user_profile"]["username"] for creator in data if min_views <= creator["posts_info"]["views"].mean() <= max_views]

In [11]:
#1.2 Function for getting the number of posts for each creator
def get_num_posts(data):
    return [len(creator["posts_info"]) for creator in data]

In [24]:
def filter_creators(creators, min_followers=None, max_followers=None, keyword=None):
    filtered_creators = []
    for creator in creators:
        filtered_posts = filter_posts(creator["posts_info"], keyword)  # Filter each creator's posts
        if filtered_posts.empty:
            continue  # Skip creators with no matching posts
        creator["posts_info"] = filtered_posts  # Update filtered posts
        filtered_creators.append(creator)
    return filtered_creators

def filter_posts(posts_df, keyword=None):
    if not keyword:
        return posts_df.copy()  # Return all posts if no keyword provided

    # Search for keyword case-insensitively in the 'description' column
    filtered_df = posts_df[posts_df["description"].str.contains(keyword, case=False)]

    # Optionally, filter by keyword in other relevant t like 'user_profile' if needed
    # Add necessary conditions here if applicable

    return filtered_df

In [21]:
#get username of all creators
usernames = [creator["user_profile"]["username"] for creator in df]

In [34]:
# Function for filtering based on keyword present in the bio
def filter_by_bio_keyword(data, keyword):
    return [creator["user_profile"]["username"] for creator in data if keyword in creator["user_profile"]["tiktok_bio"]]

In [85]:
filter_by_bio_keyword(df, "news")

['lucindapikkat', 'whoisthecookbook', 'thesolesupplier', 'loopedlaces']

In [91]:
#create a function to filter by keyword in the description of the posts
def filter_by_post_keyword(data, keyword):
    return [creator["posts_info"]["url"] for creator in data if keyword in creator["posts_info"]["description"].to_string()]

In [92]:
filter_by_post_keyword(df, "spend")

[0    https://www.tiktok.com/@jaysnrks/video/7294422...
 1    https://www.tiktok.com/@jaysnrks/video/7292262...
 2    https://www.tiktok.com/@jaysnrks/video/7309994...
 3    https://www.tiktok.com/@jaysnrks/video/7309643...
 4    https://www.tiktok.com/@jaysnrks/video/7309633...
 5    https://www.tiktok.com/@jaysnrks/video/7309224...
 6    https://www.tiktok.com/@jaysnrks/video/7308209...
 Name: url, dtype: object]

In [59]:
#1.6 Function for getting the average number of views for each creator given username
def get_average_views(data, username):
    for creator in data:
        if creator["user_profile"]["username"] == username:
            return creator["posts_info"]["views"].mean()
    return None



In [60]:
get_average_views(df, "lucindapikkat")

77593.25

In [63]:
# get filter based on likes of post and return username and url in post_info
def filter_by_likes(data, min_likes, max_likes):
    filtered_creators = []
    for creator in data:
        filtered_posts = creator["posts_info"][(creator["posts_info"]["likes"] >= min_likes) & (creator["posts_info"]["likes"] <= max_likes)]
        if not filtered_posts.empty:
            creator["posts_info"] = filtered_posts
            filtered_creators.append(creator["user_profile"]["username"])
    return filtered_creators
