# Fetch user metadata, posts with the [TikAPI](https://tikapi.io/documentation/)

#### Load Python tools and Jupyter config

In [1]:
import os
import json
import requests
import pandas as pd
import jupyter_black
import altair as alt
from tqdm.notebook import tqdm
from tikapi import TikAPI, ValidationException, ResponseException

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

In [3]:
mykey = os.environ.get("tikapikey")
api = TikAPI(mykey)

In [4]:
today = pd.Timestamp("today").strftime("%Y%m%d")

---

## Read data

#### Users names for API

In [5]:
users = ["kamalahq"]

#### Function to fetch metadata from a list of users

In [6]:
def fetch_user_meta(api, users):
    user_meta = []
    for u in tqdm(users):
        try:
            response = api.public.check(username=u).json()
            user_info = response["userInfo"]["user"]
            stats = response["userInfo"]["stats"]

            user_dict = {
                "id": user_info["id"],
                "name": user_info["nickname"],
                "name": user_info["nickname"],
                "followers": stats["followerCount"],
                "following": stats["followingCount"],
                "friend_count": stats["friendCount"],
                "heart_count": stats["heartCount"],
                "videos_posted": stats["videoCount"],
                "bio": user_info["signature"],
                "link": user_info.get("bioLink", {}).get("link", ""),
                "desc": response["shareMeta"]["desc"],
                "sec_id": user_info["secUid"],
            }

            user_meta.append(user_dict)
        except KeyError as e:
            print(f"Error processing user {u}: Missing key {e}")
        except Exception as e:
            print(f"Unexpected error processing user {u}: {e}")

    return user_meta

#### Fetch data

In [7]:
user_data = fetch_user_meta(api, users)

  0%|          | 0/1 [00:00<?, ?it/s]

#### Into a dataframe

In [8]:
df = pd.DataFrame(user_data)

In [9]:
df.head()

Unnamed: 0,id,name,followers,following,friend_count,heart_count,videos_posted,bio,link,desc,sec_id
0,7334124418954200106,Kamala HQ,3200000,8,3,70600000,410,Providing context.,,"@kamalahq 3.2m Followers, 8 Following, 70.6m Likes - Watch awesome short videos created by Kamala HQ",MS4wLjABAAAAdS-2IIPTkxR5BwnI-1NIh-qVHKIPoopksQb8Hk3pUYo-1ksnG3DHC4pIIprVdH1f


---

## Posts

#### Function to fetch a user's posts

In [10]:
def fetch_user_posts(sec_uid):
    all_posts = []

    try:
        response = api.public.posts(secUid=sec_uid)
        while response:
            posts_data = response.json()
            for item in posts_data.get("itemList", []):
                stats = item.get("stats", {})
                post_info = {
                    "user_id": item["author"]["id"],
                    "user_name": item["author"]["nickname"],
                    "user_unique_id": item["author"]["uniqueId"],
                    "post_desc": item["desc"],
                    "create_time": item["createTime"],
                    "post_id": item["video"]["id"],
                    "duration": item["video"].get("duration", "N/A"),
                    "bookmark_count": stats.get("collectCount", 0),
                    "comment_count": stats.get("commentCount", 0),
                    "heart_count": stats.get("diggCount", 0),
                    "share_count": stats.get("shareCount", 0),
                    "play_count": stats.get("playCount", 0),
                }
                all_posts.append(post_info)

            # Check if there are more items to fetch
            if not posts_data.get("hasMore", False):
                break

            # Fetch the next page
            response = response.next_items()

    except Exception as e:
        print(f"An error occurred: {e}")

    return pd.DataFrame(all_posts)

#### Define IDs for users

In [11]:
ids = df["sec_id"].to_list()

#### Get a list of dataframes for each user's posts

In [12]:
dfs = []

for i in ids:
    dfs.append(fetch_user_posts(i))

#### Clean up the dates. All datetimes Eastern.

In [13]:
df_posts = pd.concat(dfs)

In [14]:
df_posts["create_time_est"] = (
    pd.to_datetime(df_posts["create_time"], unit="s")
    .dt.tz_localize("UTC")
    .dt.tz_convert("America/New_York")
)

In [15]:
df_posts["created_date"] = pd.to_datetime(df_posts["create_time_est"], unit="s").dt.date
df_posts["created_time"] = pd.to_datetime(df_posts["create_time_est"], unit="s").dt.time
df_posts["create_time"] = pd.to_datetime(df_posts["create_time_est"], unit="s")

In [16]:
df_posts = df_posts.drop("create_time", axis=1)

In [17]:
df_posts.tail()

Unnamed: 0,user_id,user_name,user_unique_id,post_desc,post_id,duration,bookmark_count,comment_count,heart_count,share_count,play_count,create_time_est,created_date,created_time
395,7334124418954200106,Kamala HQ,kamalahq,oof.,7335168622681148715,11,195,421,4903,81,200600,2024-02-13 14:21:13-05:00,2024-02-13,14:21:13
396,7334124418954200106,Kamala HQ,kamalahq,He really said that…,7334867980217519402,20,403,1554,7872,158,588700,2024-02-12 18:54:16-05:00,2024-02-12,18:54:16
397,7334124418954200106,Kamala HQ,kamalahq,lol,7334817814542437675,15,832,979,18100,432,470400,2024-02-12 15:39:51-05:00,2024-02-12,15:39:51
398,7334124418954200106,Kamala HQ,kamalahq,weird brag,7334760317244771627,9,469,982,10800,206,457800,2024-02-12 11:56:52-05:00,2024-02-12,11:56:52
399,7334124418954200106,Kamala HQ,kamalahq,lol hey guys,7334529963066019114,26,39900,7151,891400,33100,10900000,2024-02-11 21:02:37-05:00,2024-02-11,21:02:37


---

## Exports

#### Out put json and csv for each user

In [18]:
for user_to_export in df_posts["user_unique_id"].unique():
    df_posts.query(f'user_unique_id == "{user_to_export}"').to_json(
        f"data/processed/{user_to_export}_all_posts.json",
        indent=4,
        orient="records",
    )

    df_posts.query(f'user_unique_id == "{user_to_export}"').to_csv(
        f"data/processed/{user_to_export}_all_posts.csv", index=False
    )