In [None]:
import tweepy
import pandas as pd
import os
from dotenv import load_dotenv

# --- 1. LOAD KEYS ---
load_dotenv('../.env') 
bearer_token = os.getenv('TWITTER_BEARER_TOKEN')

if not bearer_token:
    print("ERROR: TWITTER_BEARER_TOKEN not found.")
else:
    print("Bearer Token loaded successfully.")

# --- 2. SET UP API CLIENT ---
client = tweepy.Client(bearer_token, wait_on_rate_limit=True)

# --- 3. DEFINE YOUR SEARCH ---
query = '(#BigBillionDays OR #GreatIndianFestival OR "Flipkart Sale" OR "Amazon Sale") -is:retweet lang:en'

# --- 4. FETCH TWEETS ---
tweets_list = []
total_tweets_to_fetch = 100  # We can only get ~100 with the free tier

try:
    print(f"Connecting to X API and fetching {total_tweets_to_fetch} tweets...")
    
    for tweet in tweepy.Paginator(client.search_recent_tweets, 
                                   query=query,
                                   tweet_fields=['created_at', 'public_metrics'], 
                                   max_results=100).flatten(limit=total_tweets_to_fetch):
        
        tweets_list.append([tweet.id, tweet.created_at, tweet.text,
                            tweet.public_metrics['retweet_count'],
                            tweet.public_metrics['like_count']])

    print(f"\nSuccessfully collected {len(tweets_list)} tweets.")

except Exception as e:
    print(f"An error occurred: {e}")


# --- 5. SAVE TO A CLEAN TABLE (DATAFRAME) ---
if tweets_list:
    columns = ['ID', 'CreatedAt', 'Text', 'Retweets', 'Likes']
    df = pd.DataFrame(tweets_list, columns=columns)
    
    output_path = '../data/raw_tweets.csv'
    df.to_csv(output_path, index=False)
    
    print(f"Data saved to {output_path}")
    print("\nHere's a sample of your data:")
    print(df.head())
    
else:
    print("No tweets were collected. Nothing to save.")