In [None]:
!pip install ntscraper pandas

In [None]:
import random
import time
from typing import Iterable, List

import pandas as pd
from ntscraper import Nitter


class TwitterBatchScraper:
    """Bulk scraper for public tweets via Nitter instances."""

    def __init__(self, target_accounts: Iterable[str], tweets_per_user: int = 20):
        if not isinstance(target_accounts, Iterable):
            raise TypeError("target_accounts must be an iterable of usernames")
        self.target_accounts: List[str] = [str(user).strip() for user in target_accounts if str(user).strip()]
        if not self.target_accounts:
            raise ValueError("At least one target account must be provided")
        if tweets_per_user <= 0:
            raise ValueError("tweets_per_user must be a positive integer")
        self.tweets_per_user = int(tweets_per_user)
        self.scraper = Nitter()
        self.dataframe: pd.DataFrame | None = None

    def _scrape_user(self, username: str) -> pd.DataFrame:
        """Scrape tweets for a single user, returning a DataFrame."""
        try:
            results = self.scraper.get_tweets(username, mode="user", number=self.tweets_per_user)
        except Exception as exc:  # noqa: BLE001
            print(f"[ERROR] Failed to scrape {username}: {exc}")
            return pd.DataFrame()

        tweets = []
        for tweet in results.get("tweets", []):
            stats = tweet.get("stats", {}) if isinstance(tweet, dict) else {}
            tweets.append(
                {
                    "source_user": username,
                    "username": tweet.get("user", {}).get("username") if isinstance(tweet, dict) else None,
                    "text": tweet.get("text") if isinstance(tweet, dict) else None,
                    "date": tweet.get("date") if isinstance(tweet, dict) else None,
                    "likes": stats.get("likes"),
                    "retweets": stats.get("retweets"),
                    "replies": stats.get("replies"),
                    "quotes": stats.get("quotes"),
                }
            )

        return pd.DataFrame(tweets)

    def scrape_all(self) -> pd.DataFrame:
        """Scrape tweets for all target accounts and aggregate into a DataFrame."""
        all_frames: list[pd.DataFrame] = []

        for idx, user in enumerate(self.target_accounts, start=1):
            print(f"[{idx}/{len(self.target_accounts)}] Scraping @{user} (limit={self.tweets_per_user})")
            df_user = self._scrape_user(user)
            if not df_user.empty:
                all_frames.append(df_user)
                print(f"Collected {len(df_user)} tweets from @{user}")
            else:
                print(f"No tweets collected for @{user}")

            # Rate limiting between users
            delay = random.uniform(2, 5)
            time.sleep(delay)

        if all_frames:
            self.dataframe = pd.concat(all_frames, ignore_index=True)
        else:
            self.dataframe = pd.DataFrame(
                columns=["source_user", "username", "text", "date", "likes", "retweets", "replies", "quotes"]
            )

        return self.dataframe

    def save_to_csv(self, filename: str = "batch_scrape_results.csv") -> None:
        """Persist the aggregated DataFrame to disk."""
        if self.dataframe is None:
            raise ValueError("No data to save. Run scrape_all() first.")
        self.dataframe.to_csv(filename, index=False)
        print(f"Saved results to {filename}")


In [None]:
# Sample usage
# Provide the list of target accounts and desired number of tweets per user
sample_accounts = ["SpaceX", "Tesla"]
scraper = TwitterBatchScraper(target_accounts=sample_accounts, tweets_per_user=5)
result_df = scraper.scrape_all()
result_df.head()
