In [1]:
import os
import dotenv
dotenv.load_dotenv()
from apify_client import ApifyClient
import pandas as pd

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [2]:
# Create the project folder if it does not exist
PROJECT = "market-signals"
folder_path = os.path.join("results", PROJECT)
os.makedirs(folder_path, exist_ok=True)

# Define search parameters
RESULTS_PER_PAGE = 10
SEARCH_TERMS = [
    "Small Caps",
    "Stocks",
    "Shares",
    "Buy",
    "Sell",
    "Profitable trades",
    "Free trading alerts",
    "Free trading ideas",
    "Free trading education",
    "Tickers",
    "Trading floor",
    "Trader floor",
    "FinTwit",
    "Stupid money",
    "Get rich",
    "Bank",
    "Financial freedom",
    "F U money",
    "Right call",
    "Called it right",
    "Winner(s)",
    "Compound gains",
    "Six (6) figures",
    "Millionaire",
    "Million dollar(s)",
    "Prosper",
    "Cheap",
    "Chart",
    "Swing",
    "Tested level",
    "Monster",
    "Upside",
    "Downside",
    "Higher",
    "Lower",
    "Breaks/Breakout",
    "Blast off",
    "Pump",
    "Dump",
]
# PROFILES = [
#     "aswathdamodaranofficial",
#     "eddyelfenbein",
#     "downtownjbrown",
#     "madymills",
#     "humphreytalks",
#     "sumitsinvestmenttakes",
#     "calltoleap",
#     "alexisanddean",
#     "taylormitchell.io",
#     "joyeeyang0",
#     "austinhankwitz",
#     "pensioncraft",
#     "gabriel.nussbaum",
#     "stocksandsavings",
#     "msfx_trading",
#     "shiftingshares",
#     "james_shack",
#     "brianferoldi",
#     "liz.ann.sonders",
#     "fluentinfinance",
#     "grahamstephan",
#     "investmattallen",
#     "lti.finance",
#     "petermallouk",
#     "emmet_savage",
#     "andrewbeebe"
# ]

# Query Video Metadata based on Time

In [3]:
# Initialize the ApifyClient with your API token
client = ApifyClient(os.getenv("APIFY_API"))

# Prepare the Actor input
# run_input = {
#     "excludePinnedPosts": False,
#     "oldestPostDate": POST_DATE_OLDEST,
#     "profileScrapeSections": [
#         "videos"
#     ],
#     "resultsPerPage": 100,
#     "profileSorting": "latest",
#     "profiles": PROFILES,
#     "shouldDownloadCovers": False,
#     "shouldDownloadSlideshowImages": False,
#     "shouldDownloadSubtitles": False,
#     "shouldDownloadVideos": False
# }

run_input = {
    "excludePinnedPosts": False,
    "resultsPerPage": RESULTS_PER_PAGE,
    "searchQueries": SEARCH_TERMS,
    "searchSection": "/video",
    "shouldDownloadCovers": False,
    "shouldDownloadSlideshowImages": False,
    "shouldDownloadSubtitles": False,
    "shouldDownloadVideos": False
}

# Run the Actor and wait for it to finish
run = client.actor("OtzYfK1ndEGdwWFKQ").call(run_input=run_input)

In [5]:
# Fetch new video metadata
new_video_metadata = pd.DataFrame(list(client.dataset(run["defaultDatasetId"]).iterate_items()))
new_video_metadata["extractionTime"] = pd.Timestamp.utcnow()

# Define the file path
video_metadata_path = f"results/{PROJECT}/video_metadata.csv"

if os.path.exists(video_metadata_path):
    # Load the existing file
    old_video_metadata = pd.read_csv(video_metadata_path)
    old_video_metadata["id"] = old_video_metadata["id"].astype("str")
    
    # Append new data
    updated_video_metadata = pd.concat([old_video_metadata, new_video_metadata])

else:
    # Save the new data
    updated_video_metadata = new_video_metadata

# Remove duplicates, keeping the latest entry 
dict_columns = [col for col in updated_video_metadata.columns if updated_video_metadata[col].apply(lambda x: isinstance(x, (dict, list))).any()]
updated_video_metadata.drop_duplicates(
    subset=[col for col in updated_video_metadata.columns if col != "extractionTime" and col not in dict_columns],
    keep='last', 
    inplace=True
)

# Save the combined data
updated_video_metadata.to_csv(video_metadata_path, index=False)

# Display the head of the DataFrame
print(updated_video_metadata.shape)
updated_video_metadata.head()

(390, 24)


Unnamed: 0,id,text,createTime,createTimeISO,isAd,isMuted,authorMeta,musicMeta,webVideoUrl,mediaUrls,videoMeta,diggCount,shareCount,playCount,collectCount,commentCount,mentions,detailedMentions,hashtags,effectStickers,isSlideshow,isPinned,searchQuery,extractionTime
0,7454252393333820718,Yahoo Finance's 2025 stock market outlook has ...,1735738500,2025-01-01T13:35:00.000Z,False,False,"{'id': '6904736114286314502', 'name': 'yahoofi...",{'musicName': 'original sound - Yahoo Finance'...,https://www.tiktok.com/@yahoofinance/video/745...,[],"{'height': 1024, 'width': 576, 'duration': 50,...",652,23,39900,102,17,[],[],"[{'id': '1663811318798342', 'name': 'yahoofina...",[],False,False,Stocks,2025-01-03 13:14:54.512187+00:00
1,7452063654142430510,Dan Ives explains why Palantir 'could be the n...,1735484700,2024-12-29T15:05:00.000Z,False,False,"{'id': '6904736114286314502', 'name': 'yahoofi...",{'musicName': 'original sound - Yahoo Finance'...,https://www.tiktok.com/@yahoofinance/video/745...,[],"{'height': 1024, 'width': 576, 'duration': 40,...",712,129,56500,112,44,[@briansozzi],"[{'id': '6955921024605537285', 'name': 'brians...","[{'id': '', 'name': ''}, {'id': '1663811318798...",[],False,False,Stocks,2025-01-03 13:14:54.512187+00:00
2,7451682964452150533,2 stocks that could explode this week 📈🚀 #stoc...,1734980146,2024-12-23T18:55:46.000Z,False,False,"{'id': '7265726713907233797', 'name': 'crowntr...","{'musicName': 'original sound', 'musicAuthor':...",https://www.tiktok.com/@crowntradingllc/video/...,[],"{'height': 1024, 'width': 576, 'duration': 73,...",841,229,41000,337,91,[],[],"[{'id': '3667054', 'name': 'stocks', 'title': ...",[],False,False,Stocks,2025-01-03 13:14:54.512187+00:00
3,7455341708537040170,"AI monetization takes time, according to Calli...",1735835700,2025-01-02T16:35:00.000Z,False,False,"{'id': '6904736114286314502', 'name': 'yahoofi...",{'musicName': 'original sound - Yahoo Finance'...,https://www.tiktok.com/@yahoofinance/video/745...,[],"{'height': 1024, 'width': 576, 'duration': 52,...",61,9,3274,4,4,[],[],"[{'id': '1663811318798342', 'name': 'yahoofina...",[],False,False,Stocks,2025-01-03 13:14:54.512187+00:00
4,7455379751885278510,"Stock market update January 2, 2025. #stockmar...",1735840882,2025-01-02T18:01:22.000Z,False,False,"{'id': '6936787321048335365', 'name': 'officia...","{'musicName': 'original sound', 'musicAuthor':...",https://www.tiktok.com/@officialjohnduncan/vid...,[],"{'height': 1024, 'width': 576, 'duration': 59,...",264,16,45700,27,81,[],[],"[{'id': '10281486', 'name': 'stockmarket', 'ti...",[],False,False,Stocks,2025-01-03 13:14:54.512187+00:00


# Extract/Update Profile Information

In [6]:
# Extract the authorMeta field
new_profile_metadata = updated_video_metadata[["authorMeta", "extractionTime"]]

# Convert the authorMeta dictionary to separate columns
new_profile_metadata = pd.json_normalize(new_profile_metadata['authorMeta']).join(new_profile_metadata['extractionTime'])
new_profile_metadata.rename(columns={"name": "profile"}, inplace=True)
new_profile_metadata["id"] = new_profile_metadata["id"].astype("str")

# Check if the file exists
profile_metadata_path = f"results/{PROJECT}/profile_metadata.csv"
if os.path.exists(profile_metadata_path):
    # Load the existing file
    prev_profile_metadata = pd.read_csv(profile_metadata_path)
    prev_profile_metadata["id"] = prev_profile_metadata["id"].astype("str")
    
    # Append new data
    updated_profile_metadata = pd.concat([prev_profile_metadata, new_profile_metadata])

else:
    # Save the new data
    updated_profile_metadata = new_profile_metadata
    
# Remove duplicates, keeping the latest entry 
dict_columns = [col for col in updated_profile_metadata.columns if updated_profile_metadata[col].apply(lambda x: isinstance(x, (dict, list))).any()]
updated_profile_metadata.drop_duplicates(
    subset=[col for col in updated_profile_metadata.columns if col != "extractionTime" and col not in dict_columns],
    keep='last', 
    inplace=True
)

# Drop invalid profiles
updated_profile_metadata = updated_profile_metadata[updated_profile_metadata["id"]!='nan'].reset_index(drop=True)

# Save profile metadata locally
updated_profile_metadata.to_csv(profile_metadata_path, index=False)

# Display the head of the DataFrame
print(updated_profile_metadata.shape)
updated_profile_metadata.head()

(390, 16)


Unnamed: 0,id,profile,profileUrl,nickName,verified,signature,bioLink,originalAvatarUrl,avatar,privateAccount,following,fans,heart,video,digg,extractionTime
0,6904736114286314502,yahoofinance,https://www.tiktok.com/@yahoofinance,Yahoo Finance,True,Helping people build wealth with the world’s b...,,https://p19-pu-sign-useast8.tiktokcdn-us.com/t...,https://p19-pu-sign-useast8.tiktokcdn-us.com/t...,False,47,545200,10300000,2748,1291,2025-01-03 13:14:54.512187+00:00
1,6904736114286314502,yahoofinance,https://www.tiktok.com/@yahoofinance,Yahoo Finance,True,Helping people build wealth with the world’s b...,,https://p19-pu-sign-useast8.tiktokcdn-us.com/t...,https://p19-pu-sign-useast8.tiktokcdn-us.com/t...,False,47,545200,10300000,2748,1291,2025-01-03 13:14:54.512187+00:00
2,7265726713907233797,crowntradingllc,https://www.tiktok.com/@crowntradingllc,crowntrading,False,"Investing, Trading and Personal Finance 📈\nSto...",,https://p16-sign-va.tiktokcdn.com/tos-maliva-a...,https://p16-sign-va.tiktokcdn.com/tos-maliva-a...,False,27,2335,9435,193,184,2025-01-03 13:14:54.512187+00:00
3,6904736114286314502,yahoofinance,https://www.tiktok.com/@yahoofinance,Yahoo Finance,True,Helping people build wealth with the world’s b...,,https://p19-pu-sign-useast8.tiktokcdn-us.com/t...,https://p19-pu-sign-useast8.tiktokcdn-us.com/t...,False,47,545200,10300000,2748,1291,2025-01-03 13:14:54.512187+00:00
4,6936787321048335365,officialjohnduncan,https://www.tiktok.com/@officialjohnduncan,John Duncan│High End Finance,False,"Millionaire Tax and Investing tips, Author, Ad...",,https://p16-sign-va.tiktokcdn.com/tos-maliva-a...,https://p16-sign-va.tiktokcdn.com/tos-maliva-a...,False,5190,35600,159700,551,17900,2025-01-03 13:14:54.512187+00:00
