In [20]:
import requests
import csv
import time
import os

# Initialise params
params = {
    "type": "Latest",
    "count": "100",
    "query": "fire AND Malaysia"
}

# URL and Headers
url = "https://twitter241.p.rapidapi.com/search-v2"
headers = {
    "X-RapidAPI-Key": "c7d63f413cmshf2851df4a5e42d1p1ed1bejsn092d86a02e69",
    "X-RapidAPI-Host": "twitter241.p.rapidapi.com"
}

# CSV File Setup
csv_file = 'tweets3_data.csv'
fieldnames = ['tweet_id', 'tweet_text', 'tweet_created_at', 'tweet_hashtags',
              'location', 'user_id', 'user_name', 'user_screen_name',
              'verified', 'verification_type', 'user_followers_count',
              'professional_type', 'tweet_images']

# Create the file if it doesn't exist and write header
if not os.path.exists(csv_file):
    with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()

seen_ids = set()  # To avoid duplicates
next_cursor = None

while True:
    if next_cursor:
        params["cursor"] = next_cursor
    elif "cursor" in params:
        del params["cursor"]  # Remove cursor param for first request

    response = requests.get(url, headers=headers, params=params)

    if response.status_code != 200:
        print("Request failed with status code:", response.status_code)
        break

    data = response.json()
    tweets = []
    next_cursor = None  # Reset before checking again

    for entry in data.get('result', {}).get('timeline', {}).get('instructions', []):
        if 'entries' in entry:
            for item in entry['entries']:
                # Tweet data
                if item.get('content', {}).get('itemContent', {}).get('itemType') == "TimelineTweet":
                    tweet = item['content']['itemContent']['tweet_results']['result']

                    tweet_legacy = tweet.get('legacy', {})
                    hashtags = tweet_legacy.get('entities', {}).get('hashtags', [])
                    tweet_id = tweet.get('rest_id', None)

                    if tweet_id and tweet_id not in seen_ids:
                        seen_ids.add(tweet_id)
                        user = tweet.get('core', {}).get('user_results', {}).get('result', {})
                        user_legacy = user.get('legacy', {})

                        # Extract images from tweet if present
                        images = []
                        media = tweet_legacy.get('extended_entities', {}).get('media', [])
                        for m in media:
                            if m.get('type') == 'photo':
                                images.append(m.get('media_url_https', ''))
                        tweet_images = ','.join(images) if images else 'null'

                        tweet_info = {
                            'tweet_id': tweet_id,
                            'tweet_text': tweet_legacy.get('full_text', ''),
                            'tweet_created_at': tweet_legacy.get('created_at', ''),
                            'tweet_hashtags': ','.join(tag.get('text', '') for tag in hashtags) if hashtags else 'null',
                            'location': user_legacy.get('location', ''),
                            'user_id': user.get('rest_id', ''),
                            'user_name': user_legacy.get('name', ''),
                            'user_screen_name': user_legacy.get('screen_name', ''),
                            'verified': user.get('verified', False),
                            'verification_type': user.get('verification_type', 'null'),
                            'user_followers_count': user_legacy.get('followers_count', 0),
                            'professional_type': user.get('professional', {}).get('professional_type', 'null'),
                            'tweet_images': tweet_images
                        }
                        tweets.append(tweet_info)

                # Cursor data
                if item.get('entryId', '').startswith('cursor-bottom'):
                    next_cursor = item['content'].get('value')

    # Save tweets
    if tweets:
        with open(csv_file, mode='a', newline='', encoding='utf-8') as file:
            writer = csv.DictWriter(file, fieldnames=fieldnames)
            writer.writerows(tweets)
        print(f"{len(tweets)} new tweets saved to CSV.")
    else:
        print("No new tweets found.")

    # If no more cursor, stop
    if not next_cursor:
        print("No more cursor. Scraping complete.")
        break

    # Wait before the next request (optional)
    time.sleep(2)


20 new tweets saved to CSV.
18 new tweets saved to CSV.
No more cursor. Scraping complete.
