In [2]:
!pip install tweepy
from google.colab import userdata



In [5]:
import tweepy
import time

Get API KEY, SECRET AND BEARER TOKEN from the X Developer portal free tier: https://developer.x.com/

In [3]:

# Twitter API credentials

bearer_token = userdata.get('XBEARER')


# Authenticate with the Twitter API using Bearer Token


In [6]:
client = tweepy.Client(bearer_token=bearer_token)


# Define search parameters

In [7]:
query = "DeepSeek AI lang:en -is:retweet"  # Search for English tweets, excluding retweets
max_results = 100  # Fetch up to 100 tweets per request (API limit)


# Function to handle API rate limits and fetch tweets


In [8]:
def fetch_tweets_with_retry(query, max_results, retries=3):
    """
    Fetch tweets from the X API with rate-limit handling.
    Args:
        query (str): The search query string.
        max_results (int): Maximum number of tweets per request.
        retries (int): Number of retries in case of rate limiting.
    Returns:
        list: List of fetched tweets.
    """
    attempt = 0
    while attempt < retries:
        try:
            # Fetch recent tweets
            response = client.search_recent_tweets(
                query=query,
                max_results=max_results,
                tweet_fields=["created_at", "text", "author_id", "public_metrics", "entities"]
            )
            return response.data if response.data else []
        except tweepy.errors.TooManyRequests:
            attempt += 1
            wait_time = 15 * 60  # Wait 15 minutes before retrying
            print(f"Rate limit reached. Retrying in {wait_time // 60} minutes...")
            time.sleep(wait_time)
    print("Max retries reached. Exiting.")
    return []

# Fetch tweets

In [9]:

print(f"Fetching tweets about '{query}'...\n")
tweets = fetch_tweets_with_retry(query, max_results)


Fetching tweets about 'DeepSeek AI lang:en -is:retweet'...



In [10]:

if tweets:
    # Sort tweets by retweet count in descending order
    sorted_tweets = sorted(tweets, key=lambda x: x.public_metrics["retweet_count"], reverse=True)

    # Get the top 15 tweets
    top_tweets = sorted_tweets[:15]

    print(f"Top 15 Tweets about '{query}' by Retweets:\n")
    for i, tweet in enumerate(top_tweets, start=1):
        print(f"{i}. Tweet ID: {tweet.id}")
        print(f"   Author ID: {tweet.author_id}")
        print(f"   Created At: {tweet.created_at}")
        print(f"   Retweets: {tweet.public_metrics['retweet_count']}")
        print(f"   Text: {tweet.text}")
        print(f"   Mentions: {tweet.entities.get('mentions', []) if tweet.entities else 'None'}")
        print("-" * 80)
else:
    print("No tweets found or rate limit exceeded.")


Top 15 Tweets about 'DeepSeek AI lang:en -is:retweet' by Retweets:

1. Tweet ID: 1884371370773209108
   Author ID: 1814726379805814787
   Created At: 2025-01-28 22:41:59+00:00
   Retweets: 1
   Text: @BillAckman @deepseek_ai @nvidia I went to China. They have humans inside their ATM cash machines. It could be like that with DeepSeek, human PHD's answering every request, lol.
   Mentions: [{'start': 0, 'end': 11, 'username': 'BillAckman', 'id': '880412538625810432'}, {'start': 12, 'end': 24, 'username': 'deepseek_ai', 'id': '1714580962569588736'}, {'start': 25, 'end': 32, 'username': 'nvidia', 'id': '61559439'}]
--------------------------------------------------------------------------------
2. Tweet ID: 1884371202334212243
   Author ID: 2962828127
   Created At: 2025-01-28 22:41:19+00:00
   Retweets: 1
   Text: @iamjasonlevin supposedly research on this already and the answer is that yes, yes you can. 

With deepseek R1 this accuracy is probably closer to 90% versus the 70% a year'ish 

 Save Results to CSV

In [13]:
import pandas as pd

# Save the top 15 tweets to a CSV file if data exists
if tweets:
    tweet_data = [
        {
            "Tweet ID": tweet.id,
            "Author ID": tweet.author_id,
            "Created At": tweet.created_at,
            "Text": tweet.text,
            "Retweets": tweet.public_metrics["retweet_count"],
            "Mentions": tweet.entities.get("mentions", []) if tweet.entities else "None"
        }
        for tweet in top_tweets
    ]

    # Convert to DataFrame
    df = pd.DataFrame(tweet_data)

    # Save to CSV
    output_file = "top_15_deepseek_ai_tweets.csv"
    df.to_csv(output_file, index=False)
    print(f"Data saved to {output_file}")


Data saved to top_15_deepseek_ai_tweets.csv
