In [1]:
import os

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from langchain_core.documents import Document
from loguru import logger
from mentoragents.models.mentor_extract import MentorExtract
from arcadepy import Arcade
import os
from mentoragents.core.config import settings

def extract_twitter_tweets(mentor_extract : MentorExtract, max_tweets : int = 100) -> list[Document]:
    """Extract tweets from Twitter for a given mentor.

    Args:
        mentor_extract : MentorExtract object containing mentor details.

    Returns:
        list[Document] : List of documents extracted from Twitter.
    """
    logger.info(f"Extracting tweets from Twitter for {mentor_extract.name}")

    ARCADE_API_KEY = settings.ARCADE_API_KEY
    USER_ID = settings.ARCADE_USER_ID
    client = Arcade(api_key=ARCADE_API_KEY)
    TOOL_NAME = "X.SearchRecentTweetsByUsername"

    all_tweets = get_all_tweets(client, mentor_extract.twitter_handle, USER_ID, TOOL_NAME, max_tweets)

    tweets = []
    for tweet in all_tweets:
        tweets.append(Document(
            page_content = tweet["text"],
            metadata = {
                "mentor_id" : mentor_extract.id,
                "mentor_name" : mentor_extract.name,
                "source" : "twitter",
                "source_url" : tweet["tweet_url"],
            }
        ))

    logger.info(f"Extracted {len(tweets)} tweets from Twitter for {mentor_extract.name}")
    return tweets


def get_all_tweets(client, username: str, user_id: str, tool_name: str = "X.SearchRecentTweetsByUsername", max_tweets: int = 100) -> list:
    """
    Fetch all available tweets for a given username using pagination.
    
    Args:
        client: Arcade client instance
        username: Twitter username to fetch tweets for
        user_id: Arcade user ID
        tool_name: Name of the Arcade tool to use
        
    Returns:
        list: All collected tweets
    """
    all_tweets = []
    next_token = None
    
    while True:
        # Prepare inputs (include next_token if we have one)
        inputs = {"username": username, "max_results": 100}
        if next_token:
            inputs["next_token"] = next_token
            
        # Execute the request
        response = client.tools.execute(
            tool_name=tool_name,
            input= {
                "owner": "ArcadeAI",
                "name": "arcade-ai",
                "starred": "true",
                "username": username,
                "max_results": max_tweets
            },
            user_id=user_id,
        )
        
        # Get tweets from the response
        new_tweets = response.output.value['data']
        all_tweets.extend(new_tweets)
        
        # Get next token if available
        next_token = response.output.value["meta"].get("next_token", None)
        
        # If no next token, we've reached the end
        if not next_token:
            break
            
    return all_tweets

In [3]:
mentor_extract = MentorExtract(
    id = "1",
    name = "Naval Ravikant",
    twitter_handle = "naval",
    expertise = "Entrepreneurship, Investing, and Technology",
    perspective = "Perspective on the future of technology and entrepreneurship",
    style = "Style of the mentor",
    image_url = "https://example.com/image.jpg",
    pdfs = ["https://example.com/pdf1.pdf", "https://example.com/pdf2.pdf"],
    url = "https://example.com",
    youtube_videos = ["https://example.com/video1", "https://example.com/video2"],
    websites = ["https://example.com", "https://example.com/2"]
)

In [4]:
extract_twitter_tweets(mentor_extract)

[32m2025-07-02 16:44:20.148[0m | [1mINFO    [0m | [36m__main__[0m:[36mextract_twitter_tweets[0m:[36m17[0m - [1mExtracting tweets from Twitter for Naval Ravikant[0m
[32m2025-07-02 16:44:21.809[0m | [1mINFO    [0m | [36m__main__[0m:[36mextract_twitter_tweets[0m:[36m38[0m - [1mExtracted 34 tweets from Twitter for Naval Ravikant[0m


[Document(metadata={'mentor_id': '1', 'mentor_name': 'Naval Ravikant', 'source': 'twitter', 'source_url': 'https://x.com/x/status/1940155841484923296'}, page_content='@BAYC5511 Communists in diapers.'),
 Document(metadata={'mentor_id': '1', 'mentor_name': 'Naval Ravikant', 'source': 'twitter', 'source_url': 'https://x.com/x/status/1940155276914827611'}, page_content='@jbraunstein914 Blocked for tone policing.'),
 Document(metadata={'mentor_id': '1', 'mentor_name': 'Naval Ravikant', 'source': 'twitter', 'source_url': 'https://x.com/x/status/1940155049050837336'}, page_content='RT @naval: It’s nationalists vs communists from here on out.'),
 Document(metadata={'mentor_id': '1', 'mentor_name': 'Naval Ravikant', 'source': 'twitter', 'source_url': 'https://x.com/x/status/1940007631495864431'}, page_content='@daan_jan Usually it’s a one-way door until some generation shoots their way out.'),
 Document(metadata={'mentor_id': '1', 'mentor_name': 'Naval Ravikant', 'source': 'twitter', 'source_u