# Fetch user metadata, posts with the [TikAPI](https://tikapi.io/documentation/)

#### Load Python tools and Jupyter config

In [1]:
import os
import json
import requests
import pandas as pd
import jupyter_black
import altair as alt
from tqdm.notebook import tqdm
from tikapi import TikAPI, ValidationException, ResponseException

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

In [3]:
mykey = os.environ.get("tikapikey")
api = TikAPI(mykey)

In [4]:
today = pd.Timestamp("today").strftime("%Y%m%d")

---

## Read data

#### Users names for API

In [5]:
users = ["lilyachty"]

#### Function to fetch metadata from a list of users

In [6]:
def fetch_user_meta(api, users):
    user_meta = []
    for u in tqdm(users):
        try:
            response = api.public.check(username=u).json()
            user_info = response["userInfo"]["user"]
            stats = response["userInfo"]["stats"]

            user_dict = {
                "id": user_info["id"],
                "name": user_info["nickname"],
                "name": user_info["nickname"],
                "followers": stats["followerCount"],
                "following": stats["followingCount"],
                "friend_count": stats["friendCount"],
                "heart_count": stats["heartCount"],
                "videos_posted": stats["videoCount"],
                "bio": user_info["signature"],
                "link": user_info.get("bioLink", {}).get("link", ""),
                "desc": response["shareMeta"]["desc"],
                "sec_id": user_info["secUid"],
            }

            user_meta.append(user_dict)
        except KeyError as e:
            print(f"Error processing user {u}: Missing key {e}")
        except Exception as e:
            print(f"Unexpected error processing user {u}: {e}")

    return user_meta

#### Fetch data

In [7]:
user_data = fetch_user_meta(api, users)

  0%|          | 0/1 [00:00<?, ?it/s]

#### Into a dataframe

In [8]:
df = pd.DataFrame(user_data)

In [9]:
df.head()

Unnamed: 0,id,name,followers,following,friend_count,heart_count,videos_posted,bio,link,desc,sec_id
0,6569595380449902597,lilyachty,9400000,95,78,233400000,281,I love girls with freckled faces,,"@lilyachty 9.4m Followers, 95 Following, 233.4m Likes - Watch awesome short videos created by lilyachty",MS4wLjABAAAAsHntXC3s0AvxcecggxsoVa4eAiT8OVafVZ4OQXxy-9htpnUi0sOYSr0kGGD1Loud


---

## Posts

#### Function to fetch a user's posts

In [10]:
def fetch_user_posts(sec_uid):
    all_posts = []

    try:
        response = api.public.posts(secUid=sec_uid)
        while response:
            posts_data = response.json()
            for item in posts_data.get("itemList", []):
                stats = item.get("stats", {})
                post_info = {
                    "user_id": item["author"]["id"],
                    "user_name": item["author"]["nickname"],
                    "user_unique_id": item["author"]["uniqueId"],
                    "post_desc": item["desc"],
                    "create_time": item["createTime"],
                    "post_id": item["video"]["id"],
                    "duration": item["video"].get("duration", "N/A"),
                    "bookmark_count": stats.get("collectCount", 0),
                    "comment_count": stats.get("commentCount", 0),
                    "heart_count": stats.get("diggCount", 0),
                    "share_count": stats.get("shareCount", 0),
                    "play_count": stats.get("playCount", 0),
                }
                all_posts.append(post_info)

            # Check if there are more items to fetch
            if not posts_data.get("hasMore", False):
                break

            # Fetch the next page
            response = response.next_items()

    except Exception as e:
        print(f"An error occurred: {e}")

    return pd.DataFrame(all_posts)

#### Define IDs for users

In [17]:
ids = df["sec_id"].to_list()

#### Get a list of dataframes for each user's posts

In [18]:
dfs = []

for i in ids:
    dfs.append(fetch_user_posts(i))

#### Clean up the dates. All datetimes Eastern.

In [19]:
df_posts = pd.concat(dfs)

In [20]:
df_posts["create_time_est"] = (
    pd.to_datetime(df_posts["create_time"], unit="s")
    .dt.tz_localize("UTC")
    .dt.tz_convert("America/New_York")
)

In [21]:
df_posts["created_date"] = pd.to_datetime(df_posts["create_time_est"], unit="s").dt.date
df_posts["created_time"] = pd.to_datetime(df_posts["create_time_est"], unit="s").dt.time
df_posts["create_time"] = pd.to_datetime(df_posts["create_time_est"], unit="s")

In [22]:
df_posts = df_posts.drop("create_time", axis=1)

In [23]:
df_posts.tail()

Unnamed: 0,user_id,user_name,user_unique_id,post_desc,post_id,duration,bookmark_count,comment_count,heart_count,share_count,play_count,create_time_est,created_date,created_time
253,6569595380449902597,lilyachty,lilyachty,LOUIS V JENGA SET #fyp #jenga #games #louisvuitton,6830129158585191686,5,3930,986,278600,1886,2100000,2020-05-23 15:49:30-04:00,2020-05-23,15:49:30
254,6569595380449902597,lilyachty,lilyachty,I wonder what my dog really thinks when I leave him outside... #fyp #funny #dogs #doglover #roof #cold #jokes,6829967127970139398,10,12600,2549,289300,12500,2000000,2020-05-23 05:20:47-04:00,2020-05-23,05:20:47
255,6569595380449902597,lilyachty,lilyachty,Iâ€™ll never play every game!!! #fyp #videogames #arcade #gamecube #nintendo #lilyachty #lilboat,6829951891472977158,15,8024,2346,415100,2593,2900000,2020-05-23 04:21:50-04:00,2020-05-23,04:21:50
256,6569595380449902597,lilyachty,lilyachty,Thatâ€™s just not right.. ðŸ¤¢. #fyp #vegies #icecream,6829949351557713157,12,2239,1027,203900,1386,2100000,2020-05-23 04:11:49-04:00,2020-05-23,04:11:49
257,6569595380449902597,lilyachty,lilyachty,Goaaaaaaaals #fyp #dreamcar,6829906232346823941,8,20700,6299,1400000,7850,10000000,2020-05-23 01:24:29-04:00,2020-05-23,01:24:29


---

## Exports

#### Out put json and csv for each user

In [24]:
for user_to_export in df_posts["user_unique_id"].unique():
    df_posts.query(f'user_unique_id == "{user_to_export}"').to_json(
        f"data/processed/{user_to_export}_all_posts.json",
        indent=4,
        orient="records",
    )

    df_posts.query(f'user_unique_id == "{user_to_export}"').to_csv(
        f"data/processed/{user_to_export}_all_posts.csv", index=False
    )