In [1]:
import tweepy
import pandas as pd
from os import mkdir, listdir
from contextlib import suppress
from utils import get_twitter_api
from tweepy.errors import NotFound, Forbidden
import json

MAX_TWEETS = 2000
MAX_REQUESTS = 100

api = get_twitter_api()

In [2]:
def save_tweets(news_id, tweet_ids, path):
    n = len(tweet_ids)
    idx_list = list(range(0, n, MAX_REQUESTS)) + [n]

    for idx, i in enumerate(idx_list[:-1]):
        chunk = tweet_ids[i:idx_list[idx+1]]

        status_list = api.lookup_statuses(id=chunk)
        retweets_list = []
        
        # get retweets
        for tweet in status_list:
            if tweet.retweet_count:
                with suppress(NotFound, Forbidden):
                    retweets = api.get_retweets(tweet.id)
                    # add field to retrieve ID of original tweet
                    for x in retweets:
                        x._json['retweeted_from'] = tweet.id
                    
                    retweets_list.extend(retweets)

        if len(retweets_list):
            print(f"Found a total of {len(retweets_list)} retweets.")

            json_path = f"{path}/retweets/{news_id}"
            mkdir(json_path)

            for retweet in retweets_list:
                with open(f"{json_path}/{retweet.id_str}.json", "w") as f:
                    json.dump(retweet._json, f)
        
        continue

        json_path = f"{path}/tweets/{news_id}"
        mkdir(json_path)

        for tweet in status_list:
            with open(f"{json_path}/{tweet.id_str}.json", "w") as f:
                json.dump(tweet._json, f)

In [3]:
for dataset in ["politifact"]:
    for news_type in ["real", "fake"]:
        df = pd.read_csv(f"dataset/FakeNewsNet/{dataset}_{news_type}.csv")
        df = df[~df['tweet_ids'].isnull()]
        n = len(df)
        
        for i, (news_id, url, title, tweet_ids) in df.iterrows():
            if i % 100 == 0:
                print(f"Processed {i} of {n}")
            
            # convert tweet ids string to ints
            tweet_ids = list(map(int, tweet_ids.split('\t')))[:MAX_TWEETS]

            with suppress(FileExistsError):
                path = f"./dataset/FakeNewsNet/{dataset}/{news_type}"
                print(f"{news_id}: {len(tweet_ids)} tweets")
                save_tweets(news_id, tweet_ids, path)


Processed 0 of 409
politifact14984: 1174 tweets
Found a total of 100 retweets.
politifact12944: 51 tweets
Found a total of 26 retweets.
politifact779: 8 tweets
politifact14064: 30 tweets
Found a total of 26 retweets.
politifact14474: 96 tweets
Found a total of 57 retweets.
politifact1313: 1882 tweets
Found a total of 4 retweets.
politifact937: 1544 tweets
Found a total of 5 retweets.
politifact1519: 55 tweets
Found a total of 2 retweets.
politifact13068: 15 tweets
Found a total of 8 retweets.
politifact11747: 60 tweets


Rate limit reached. Sleeping for: 870


Found a total of 16 retweets.
politifact9691: 55 tweets
Found a total of 9 retweets.
politifact13420: 1933 tweets
Found a total of 29 retweets.
Found a total of 44 retweets.
politifact423: 2 tweets
Found a total of 1 retweets.
politifact13087: 31 tweets
Found a total of 58 retweets.
politifact13132: 30 tweets
Found a total of 28 retweets.
politifact1084: 238 tweets
Found a total of 75 retweets.


Rate limit reached. Sleeping for: 870


Found a total of 55 retweets.
politifact2166: 35 tweets
Found a total of 26 retweets.
politifact58: 1 tweets
politifact13303: 95 tweets
Found a total of 63 retweets.
politifact3892: 1 tweets
politifact6641: 37 tweets
Found a total of 1 retweets.
politifact513: 1020 tweets
Found a total of 79 retweets.
Found a total of 39 retweets.
politifact14070: 61 tweets
Found a total of 187 retweets.
politifact806: 13 tweets
Found a total of 3 retweets.
politifact9196: 7 tweets
Found a total of 1 retweets.
politifact12945: 58 tweets


Rate limit reached. Sleeping for: 868


Found a total of 107 retweets.
politifact14036: 2000 tweets
Found a total of 51 retweets.
Found a total of 54 retweets.
politifact537: 965 tweets
Found a total of 30 retweets.
Found a total of 11 retweets.
politifact118: 1638 tweets
Found a total of 68 retweets.


Rate limit reached. Sleeping for: 869


Found a total of 45 retweets.
politifact9576: 6 tweets
politifact356: 1854 tweets
Found a total of 7 retweets.
Found a total of 5 retweets.
politifact401: 1274 tweets
Found a total of 29 retweets.
Found a total of 71 retweets.
politifact384: 2000 tweets
Found a total of 9 retweets.
Found a total of 2 retweets.
politifact12148: 6 tweets
politifact12411: 1232 tweets
Found a total of 33 retweets.
Found a total of 21 retweets.
politifact13013: 40 tweets
Found a total of 4 retweets.
politifact11191: 989 tweets
Found a total of 9 retweets.
Found a total of 1 retweets.
politifact12079: 772 tweets


Rate limit reached. Sleeping for: 864


Found a total of 39 retweets.
Found a total of 108 retweets.
politifact11761: 22 tweets
Found a total of 21 retweets.
politifact51: 2 tweets
politifact10209: 853 tweets
Found a total of 20 retweets.
Found a total of 55 retweets.
politifact11899: 91 tweets
Found a total of 24 retweets.
politifact514: 47 tweets
Found a total of 14 retweets.
politifact385: 2 tweets
politifact2298: 4 tweets
Found a total of 1 retweets.
politifact1216: 2000 tweets


Rate limit reached. Sleeping for: 867


Found a total of 112 retweets.
Found a total of 147 retweets.
politifact11989: 495 tweets
Found a total of 80 retweets.
Found a total of 63 retweets.
politifact1307: 8 tweets


Rate limit reached. Sleeping for: 870


Found a total of 6 retweets.
politifact13833: 22 tweets
Found a total of 21 retweets.
politifact636: 19 tweets
politifact10408: 1385 tweets
Found a total of 33 retweets.
Found a total of 41 retweets.
politifact542: 1558 tweets
Found a total of 2 retweets.
Found a total of 26 retweets.
politifact620: 1564 tweets
Found a total of 5 retweets.
Found a total of 1 retweets.
politifact979: 285 tweets
Found a total of 16 retweets.
Found a total of 18 retweets.
politifact13310: 525 tweets


Rate limit reached. Sleeping for: 867


Found a total of 27 retweets.
Found a total of 36 retweets.
politifact11960: 47 tweets
Found a total of 18 retweets.
politifact12057: 185 tweets
Found a total of 30 retweets.
Found a total of 23 retweets.
politifact746: 1 tweets
Found a total of 1 retweets.
politifact683: 2000 tweets
Found a total of 186 retweets.


Rate limit reached. Sleeping for: 869


Found a total of 169 retweets.
politifact245: 121 tweets
Found a total of 5 retweets.
politifact14940: 165 tweets
Found a total of 48 retweets.
Found a total of 27 retweets.
politifact1177: 38 tweets
Found a total of 1 retweets.
politifact224: 1 tweets
politifact13058: 938 tweets
Found a total of 9 retweets.
Found a total of 12 retweets.
politifact4275: 1 tweets
politifact182: 1 tweets
Found a total of 2 retweets.
politifact13193: 682 tweets
Found a total of 45 retweets.


Rate limit reached. Sleeping for: 866


Found a total of 37 retweets.
politifact763: 2 tweets
Processed 100 of 409
politifact7563: 55 tweets
Found a total of 6 retweets.
politifact8310: 116 tweets
Found a total of 43 retweets.
Found a total of 2 retweets.
politifact11580: 751 tweets
Found a total of 47 retweets.
Found a total of 1 retweets.
politifact1213: 2000 tweets
Found a total of 38 retweets.
Found a total of 21 retweets.
politifact65: 23 tweets
Found a total of 23 retweets.
politifact11855: 4 tweets
politifact8737: 121 tweets
Found a total of 3 retweets.
Found a total of 3 retweets.
politifact1731: 2000 tweets
Found a total of 98 retweets.


Rate limit reached. Sleeping for: 855


Found a total of 34 retweets.
politifact756: 34 tweets
Found a total of 12 retweets.
politifact489: 16 tweets
politifact11709: 744 tweets
Found a total of 11 retweets.
Found a total of 42 retweets.
politifact9512: 2000 tweets
Found a total of 128 retweets.


Rate limit reached. Sleeping for: 870


Found a total of 85 retweets.
politifact6730: 1542 tweets
Found a total of 2 retweets.
Found a total of 1 retweets.
politifact7888: 2000 tweets
Found a total of 20 retweets.
Found a total of 45 retweets.
politifact8130: 216 tweets
Found a total of 4 retweets.
Found a total of 7 retweets.
politifact215: 662 tweets
Found a total of 1 retweets.
Found a total of 6 retweets.
politifact5608: 3 tweets
politifact783: 6 tweets
politifact1212: 1313 tweets
Found a total of 1 retweets.
Found a total of 13 retweets.
politifact2624: 815 tweets
Found a total of 7 retweets.


Rate limit reached. Sleeping for: 857
