In [1]:
!pip install twikit datetime

Collecting twikit
  Downloading twikit-1.7.5-py3-none-any.whl (110 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.4/110.4 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datetime
  Downloading DateTime-5.5-py3-none-any.whl (52 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.6/52.6 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx (from twikit)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting filetype (from twikit)
  Downloading filetype-1.2.0-py2.py3-none-any.whl (19 kB)
Collecting pyotp (from twikit)
  Downloading pyotp-2.9.0-py3-none-any.whl (13 kB)
Collecting zope.interface (from datetime)
  Downloading zope.interface-6.4.post2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (247 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━

In [None]:
from twikit import Client
USERNAME = 'example123'
EMAIL = 'example@gmail.com'
PASSWORD = 'password123'
client = Client('en-US')
client.login(
    auth_info_1=USERNAME ,
    auth_info_2=EMAIL,
    password=PASSWORD
)

In [None]:
import csv
import os
from datetime import datetime, timezone
import time
from twikit import TooManyRequests
from IPython.display import clear_output

class TweetFetcher:
    def __init__(self, client, max_tweets=5000, csv_file_path='dataset.csv'):
        self.client = client
        self.count = 0
        self.max_tweets = max_tweets
        self.csv_file_path = csv_file_path
        self.initialize_csv_file()

    def initialize_csv_file(self):
        if not os.path.isfile(self.csv_file_path):
            with open(self.csv_file_path, mode='w', newline='', encoding='utf-8') as file:
                writer = csv.writer(file)
                writer.writerow([
                    'id', 'created_at', 'text', 'lang',
                    'in_reply_to', 'is_quote_status', 'quote', 'retweeted_tweet',
                    'possibly_sensitive', 'possibly_sensitive_editable', 'quote_count',
                    'media', 'reply_count', 'favorite_count', 'favorited', 'view_count',
                    'retweet_count', 'editable_until_msecs', 'is_translatable',
                    'is_edit_eligible', 'edits_remaining', 'state', 'replies',
                    'reply_to', 'related_tweets', 'hashtags', 'poll', 'has_card',
                    'thumbnail_title', 'thumbnail_url', 'urls', 'full_text',
                    'user_id', 'user_created_at', 'user_name', 'user_screen_name',
                    'user_profile_image_url', 'user_profile_banner_url', 'user_url',
                    'user_location', 'user_description', 'user_description_urls',
                    'user_urls', 'user_pinned_tweet_ids', 'user_blue_verified',
                    'user_verified', 'user_possibly_sensitive', 'user_can_dm',
                    'user_can_media_tag', 'user_want_retweets', 'user_default_profile',
                    'user_default_profile_image', 'user_has_custom_timelines',
                    'user_followers_count', 'user_fast_followers_count',
                    'user_normal_followers_count', 'user_following_count',
                    'user_favorites_count', 'user_listed_count', 'user_media_count',
                    'user_statuses_count', 'user_is_translator', 'user_translator_type',
                    'user_withheld_in_countries'
                ])

    def write_tweet_to_csv(self, tweet, writer):
        user = tweet.user
        writer.writerow([
            tweet.id,
            tweet.created_at,
            tweet.text,
            tweet.lang,
            tweet.in_reply_to,
            tweet.is_quote_status,
            tweet.quote,
            tweet.retweeted_tweet,
            tweet.possibly_sensitive,
            tweet.possibly_sensitive_editable,
            tweet.quote_count,
            tweet.media,
            tweet.reply_count,
            tweet.favorite_count,
            tweet.favorited,
            tweet.view_count,
            tweet.retweet_count,
            tweet.editable_until_msecs,
            tweet.is_translatable,
            tweet.is_edit_eligible,
            tweet.edits_remaining,
            tweet.state,
            tweet.replies,
            tweet.reply_to,
            tweet.related_tweets,
            tweet.hashtags,
            tweet.poll,
            tweet.has_card,
            tweet.thumbnail_title,
            tweet.thumbnail_url,
            tweet.urls,
            tweet.full_text,
            user.id,
            user.created_at,
            user.name,
            user.screen_name,
            user.profile_image_url,
            user.profile_banner_url,
            user.url,
            user.location,
            user.description,
            user.description_urls,
            user.urls,
            user.pinned_tweet_ids,
            user.is_blue_verified,
            user.verified,
            user.possibly_sensitive,
            user.can_dm,
            user.can_media_tag,
            user.want_retweets,
            user.default_profile,
            user.default_profile_image,
            user.has_custom_timelines,
            user.followers_count,
            user.fast_followers_count,
            user.normal_followers_count,
            user.following_count,
            user.favourites_count,
            user.listed_count,
            user.media_count,
            user.statuses_count,
            user.is_translator,
            user.translator_type,
            user.withheld_in_countries
        ])

    def fetch_tweets(self, tweets, writer):
        api_calls = 0
        call_limit = 200
        limit_reset_time = 15 * 60
        limit_between = 10

        while self.count < self.max_tweets:
            try:
                for tweet in tweets:
                    tweet_date = datetime.strptime(tweet.created_at, '%a %b %d %H:%M:%S %z %Y')
                    self.write_tweet_to_csv(tweet, writer)
                    self.count += 1
                    if self.count >= self.max_tweets:
                        return self.count
                    print(f"count: {self.count}")  # Moved the print statement here

                # self.countdown(limit_between)

                tweets = tweets.next()
            except TooManyRequests:
                print("Rate limit exceeded. Sleeping for 15 minutes.")
                self.countdown(limit_reset_time)
                api_calls = 0
            except StopIteration:
                break


    def countdown(self, seconds):
        while seconds > 0:
            mins, secs = divmod(seconds, 60)
            timer = f'{self.count} | waiting ' + '{:02d}:{:02d}'.format(mins, secs)
            clear_output(wait=True)
            print(timer)
            time.sleep(1)
            seconds -= 1

        # clear_output(wait=True)
        # print("Time's up!")

    def fetch_and_save_tweets(self, query, sorted='Latest'):
        try:
            tweets = self.client.search_tweet(query, sorted)
            with open(self.csv_file_path, mode='a', newline='', encoding='utf-8') as file:
                writer = csv.writer(file)
                self.fetch_tweets(tweets, writer)
                print(f"Total tweets fetched: {self.count}")
        except TooManyRequests:
            print("Rate limit exceeded. Sleeping for 15 minutes.")
            self.countdown(900)  # 15 minutes
            tweets = self.client.search_tweet(query, sorted)
            with open(self.csv_file_path, mode='a', newline='', encoding='utf-8') as file:
                writer = csv.writer(file)
                self.fetch_tweets(tweets, writer)
                print(f"Total tweets fetched: {self.count}")


tweet_fetcher = TweetFetcher(client,csv_file_path='dataset.csv')
query = '(#SaudiCup OR #TheSaudiCup_2024 OR #saudicup2024 #كأس_السعودية) since:2024-02-20 until:2024-03-05 lang:en'
tweet_fetcher.fetch_and_save_tweets(query)