In [1]:
import os
import dotenv
dotenv.load_dotenv()
from apify_client import ApifyClient
import pandas as pd

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [2]:
# Create the project folder if it does not exist
PROJECT = "market-signals"
folder_path = os.path.join("results", PROJECT)
os.makedirs(folder_path, exist_ok=True)

# Define search parameters
RESULTS_PER_PAGE = 1000
SEARCH_TERMS = [
    "Small Caps",
    "Stocks",
    "Shares",
    "Buy",
    "Sell",
    "Profitable trades",
    "Free trading alerts",
    "Free trading ideas",
    "Free trading education",
    "Tickers",
    "Trading floor",
    "Trader floor",
    "FinTwit",
    "Stupid money",
    "Get rich",
    "Bank",
    "Financial freedom",
    "F U money",
    "Right call",
    "Called it right",
    "Winner(s)",
    "Compound gains",
    "Six (6) figures",
    "Millionaire",
    "Million dollar(s)",
    "Prosper",
    "Cheap",
    "Chart",
    "Swing",
    "Tested level",
    "Monster",
    "Upside",
    "Downside",
    "Higher",
    "Lower",
    "Breaks/Breakout",
    "Blast off",
    "Pump",
    "Dump",
]

# Query Video Metadata based on Time

In [3]:
# Initialize the ApifyClient with your API token
client = ApifyClient(os.getenv("APIFY_API"))

# Prepare the Actor input
run_input = {
    "excludePinnedPosts": False,
    "resultsPerPage": RESULTS_PER_PAGE,
    "searchQueries": SEARCH_TERMS,
    "searchSection": "/video",
    "shouldDownloadCovers": False,
    "shouldDownloadSlideshowImages": False,
    "shouldDownloadSubtitles": False,
    "shouldDownloadVideos": False
}

# Run the Actor and wait for it to finish
run = client.actor("OtzYfK1ndEGdwWFKQ").call(run_input=run_input)

In [4]:
# Fetch new video metadata
new_video_metadata = pd.DataFrame(list(client.dataset(run["defaultDatasetId"]).iterate_items()))
new_video_metadata["extractionTime"] = pd.Timestamp.utcnow()

# Define the file path
video_metadata_path = f"results/{PROJECT}/video_metadata.csv"

if os.path.exists(video_metadata_path):
    # Load the existing file
    old_video_metadata = pd.read_csv(video_metadata_path)
    old_video_metadata["id"] = old_video_metadata["id"].astype("str")
    
    # Append new data
    updated_video_metadata = pd.concat([old_video_metadata, new_video_metadata])

else:
    # Save the new data
    updated_video_metadata = new_video_metadata

# Remove duplicates, keeping the latest entry 
dict_columns = [col for col in updated_video_metadata.columns if updated_video_metadata[col].apply(lambda x: isinstance(x, (dict, list))).any()]
updated_video_metadata.drop_duplicates(
    subset=[col for col in updated_video_metadata.columns if col != "extractionTime" and col not in dict_columns],
    keep='last', 
    inplace=True
)

# Save the combined data
updated_video_metadata.to_csv(video_metadata_path, index=False)

# Display the head of the DataFrame
print(updated_video_metadata.shape)
updated_video_metadata.head()

(2374, 24)


Unnamed: 0,id,text,createTime,createTimeISO,isAd,isMuted,authorMeta,musicMeta,webVideoUrl,mediaUrls,videoMeta,diggCount,shareCount,playCount,collectCount,commentCount,mentions,detailedMentions,hashtags,effectStickers,isSlideshow,isPinned,searchQuery,extractionTime
0,7447014898430643486,NEW PREVIEW ALERT!!! ‼️ 🚨 Calling all small ca...,1733893291,2024-12-11T05:01:31.000Z,False,False,"{'id': '7265539604013384747', 'name': 'dressed...","{'musicName': 'original sound', 'musicAuthor':...",https://www.tiktok.com/@dressedup.and.noh/vide...,[],"{'height': 1024, 'width': 576, 'duration': 14,...",28,2,483,3,0,[@Dejaco Hair],"[{'id': '6806740462070694918', 'name': 'dejaco...","[{'id': '1646214897155078', 'name': 'christmas...",[],False,False,Small Caps,2025-01-07 17:31:03.667539+00:00
1,7123323531785555201,What do you think of this stock? #stockmarket1...,1658528008,2022-07-22T22:13:28.000Z,False,False,"{'id': '7051116263040861186', 'name': 'getsett...","{'musicName': 'Aesthetic', 'musicAuthor': 'Tol...",https://www.tiktok.com/@getsett/video/71233235...,[],"{'height': 1024, 'width': 576, 'duration': 45,...",36,7,1925,10,2,[],[],"[{'id': '1625074159489030', 'name': 'stockmark...","[{'name': 'Green Screen', 'ID': '263840'}]",False,False,Small Caps,2025-01-07 17:31:03.667539+00:00
2,7413586135022046469,MINI CAP EXCLUSIVA 5950 DAY #newera #minicap ...,1726110043,2024-09-12T03:00:43.000Z,False,False,"{'id': '6894725696431195142', 'name': 'elchoko...","{'musicName': 'sonido original', 'musicAuthor'...",https://www.tiktok.com/@elchokocaps/video/7413...,[],"{'height': 1024, 'width': 576, 'duration': 93,...",70,2,1250,6,0,[],[],"[{'id': '40133', 'name': 'newera', 'title': ''...",[],False,False,Small Caps,2025-01-07 17:31:03.667539+00:00
3,6699523778034011398,my cap is too small since i graduated two year...,1559854434,2019-06-06T20:53:54.000Z,False,False,"{'id': '2851938', 'name': 'fw.indiaa', 'profil...","{'musicName': 'original sound', 'musicAuthor':...",https://www.tiktok.com/@fw.indiaa/video/669952...,[],"{'height': 720, 'width': 810, 'duration': 9, '...",28000,358,177500,347,108,[@iconicwill],"[{'id': '1434833', 'name': 'iconicwill', 'nick...","[{'id': '', 'name': ''}]",[],False,False,Small Caps,2025-01-07 17:31:03.667539+00:00
4,7281469942170537258,HARPER. Listing 9/22 Small cap pony wig!!! I s...,1695349344,2023-09-22T02:22:24.000Z,False,False,"{'id': '7265539604013384747', 'name': 'dressed...","{'musicName': 'September', 'musicAuthor': 'Ear...",https://www.tiktok.com/@dressedup.and.noh/vide...,[],"{'height': 1024, 'width': 576, 'duration': 16,...",41,1,1677,5,3,[],[],[],[],False,False,Small Caps,2025-01-07 17:31:03.667539+00:00


# Extract/Update Profile Information

In [5]:
# Extract the authorMeta field
new_profile_metadata = updated_video_metadata[["authorMeta", "extractionTime"]]

# Convert the authorMeta dictionary to separate columns
new_profile_metadata = pd.json_normalize(new_profile_metadata['authorMeta']).join(new_profile_metadata['extractionTime'])
new_profile_metadata.rename(columns={"name": "profile"}, inplace=True)
new_profile_metadata["id"] = new_profile_metadata["id"].astype("str")

# Check if the file exists
profile_metadata_path = f"results/{PROJECT}/profile_metadata.csv"
if os.path.exists(profile_metadata_path):
    # Load the existing file
    prev_profile_metadata = pd.read_csv(profile_metadata_path)
    prev_profile_metadata["id"] = prev_profile_metadata["id"].astype("str")
    
    # Append new data
    updated_profile_metadata = pd.concat([prev_profile_metadata, new_profile_metadata])

else:
    # Save the new data
    updated_profile_metadata = new_profile_metadata
    
# Remove duplicates, keeping the latest entry 
dict_columns = [col for col in updated_profile_metadata.columns if updated_profile_metadata[col].apply(lambda x: isinstance(x, (dict, list))).any()]
updated_profile_metadata.drop_duplicates(
    subset=[col for col in updated_profile_metadata.columns if col != "extractionTime" and col not in dict_columns],
    keep='last', 
    inplace=True
)

# Drop invalid profiles
updated_profile_metadata = updated_profile_metadata[updated_profile_metadata["id"]!='nan'].reset_index(drop=True)

# Save profile metadata locally
updated_profile_metadata.to_csv(profile_metadata_path, index=False)

# Display the head of the DataFrame
print(updated_profile_metadata.shape)
updated_profile_metadata.head()

(2184, 17)


Unnamed: 0,id,profile,profileUrl,nickName,verified,signature,bioLink,originalAvatarUrl,avatar,privateAccount,following,fans,heart,video,digg,roomId,extractionTime
0,7051116263040861186,getsett,https://www.tiktok.com/@getsett,sett,False,sett | Watch. Learn. Invest\n💸bitesized videos...,,https://p16-sign-sg.tiktokcdn.com/aweme/720x72...,https://p16-sign-sg.tiktokcdn.com/aweme/720x72...,False,259,11500,42800,169,139,,2025-01-07 17:31:03.667539+00:00
1,6894725696431195142,elchokocaps,https://www.tiktok.com/@elchokocaps,EL CHOKO CAPS,False,🔥 NO OLVIDES SEGUIRNOS EN INSTAGRAM 🇲🇽🧢,,https://p16-sign-va.tiktokcdn.com/tos-maliva-a...,https://p16-sign-va.tiktokcdn.com/tos-maliva-a...,False,0,2967,42000,127,2354,,2025-01-07 17:31:03.667539+00:00
2,2851938,fw.indiaa,https://www.tiktok.com/@fw.indiaa,iNDiA🤪,True,Self Taught Dancer 💋\nGoal: 1M 👀\nIG: the0nlyi...,,https://p16-pu-sign-useast8.tiktokcdn-us.com/t...,https://p16-pu-sign-useast8.tiktokcdn-us.com/t...,False,876,969200,29400000,5920,298200,,2025-01-07 17:31:03.667539+00:00
3,7265539604013384747,dressedup.and.noh,https://www.tiktok.com/@dressedup.and.noh,dressedup.and.nohairtogo,False,✨I SHARE WHAT I LOVE 💕 \n✨NEW DROP PREVIEWS FO...,,https://p19-sign.tiktokcdn-us.com/tos-useast5-...,https://p19-sign.tiktokcdn-us.com/tos-useast5-...,False,177,1000,2245,130,670,,2025-01-07 17:31:03.667539+00:00
4,2691542,beautifullyyou.chantu,https://www.tiktok.com/@beautifullyyou.chantu,Chantu,False,Traveling Bridal Hairstylist \nI service all o...,,https://p16-pu-sign-useast8.tiktokcdn-us.com/t...,https://p16-pu-sign-useast8.tiktokcdn-us.com/t...,False,1610,12300,300100,385,31000,,2025-01-07 17:31:03.667539+00:00
