In [1]:
import GetOldTweets3 as got3
import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedelta
import time

I will extract top tweets for the past 10 years. 100 top tweets will be extracted each month on the specified topic for past 10 years

In [12]:

def get_tweets(topic, end_date, years=10):
    columns = ['id', 'url', 'author', 'retweets', 'favorites', 'mentions', 'hashtags', 'geo', 'time', 'text']
    text_query = topic
    
    # convert the end_date string to the datetime object
    end_date_datetime = datetime.strptime(end_date, '%Y-%m-%d')

    # This section is to create a list of datetime objects that are 1 month apart going backwards
    date_range = [end_date_datetime]
    total_months = 12 * years
    
    prior_date = end_date_datetime
    
    while total_months > 1:
        prior_date = prior_date - relativedelta(months=1)
        date_range.append(prior_date)
        total_months -= 1
        
    # Convert the elements in the list from datetime objects to string
    date_range_string = [twitter_date.strftime('%Y-%m-%d') for twitter_date in date_range][::-1]
    print("Start querying data...")
    
    # Initialize an empty list to store dataframe from each iteration
    compiled_tweets_df = pd.DataFrame(columns=columns)
    
    
    for i in range(len(date_range_string)-1):
        tweetCriteria = got3.manager\
                .TweetCriteria()\
                .setQuerySearch(text_query)\
                .setLang('en')\
                .setSince(date_range_string[i])\
                .setUntil(date_range_string[i+1])\
                .setTopTweets(True)\
                .setMaxTweets(100)

        tweets = got3.manager.TweetManager.getTweets(tweetCriteria)

        text_tweets = [[tweet.id, tweet.permalink, tweet.username, tweet.retweets, 
                tweet.favorites, tweet.mentions, tweet.hashtags, tweet.geo, 
                tweet.date, tweet.text] for tweet in tweets]

        sample_tweets = pd.DataFrame(text_tweets, columns=columns)
        compiled_tweets_df = pd.concat([compiled_tweets_df, sample_tweets])
        print("Iteration {} complete!".format(i+1))
        print("Pause the operation...")
        time.sleep(5)
        print("Begin new iteration...")
        
    return compiled_tweets_df # concatenate the list of dataframe into a single dataframe
    print("Operation complete!")
    

In [10]:
tweets = get_tweets("immigration", "2019-12-01")

Start querying data...
Iteration 1 complete!
Pause the operation...
Begin new iteration...
Iteration 2 complete!
Pause the operation...
Begin new iteration...
Iteration 3 complete!
Pause the operation...
Begin new iteration...
Iteration 4 complete!
Pause the operation...
Begin new iteration...
Iteration 5 complete!
Pause the operation...
Begin new iteration...


In [11]:
tweets

Unnamed: 0,id,url,author,retweets,favorites,mentions,hashtags,geo,time,text
0,1156716125146755072,https://twitter.com/ResisterSis20/status/11567...,ResisterSis20,125,113,@One_Voice_1,#OneVoiceImmigration,,2019-07-31 23:59:49+00:00,STAY OUT NO ENTRANCE ALLOWED DESTROY THE STATU...
1,1156715188084727808,https://twitter.com/TitaniaMcGrath/status/1156...,TitaniaMcGrath,419,2947,,,,2019-07-31 23:56:06+00:00,This is an amazing coincidence. Only the other...
2,1156713962014150657,https://twitter.com/BillTufts/status/115671396...,BillTufts,65,125,,,,2019-07-31 23:51:13+00:00,"As Quebec cuts immigration, premier calls for ..."
3,1156711449097641986,https://twitter.com/NOOBSTRUCTION/status/11567...,NOOBSTRUCTION,196,286,,,,2019-07-31 23:41:14+00:00,Entitlements are out of control for 2 reasons....
4,1156711141248315399,https://twitter.com/kylegriffin1/status/115671...,kylegriffin1,2392,8385,,,,2019-07-31 23:40:01+00:00,"Yazmin Juárez, the woman whose 1-year-old daug..."
...,...,...,...,...,...,...,...,...,...,...
95,1200750112915501058,https://twitter.com/termc1/status/120075011291...,termc1,42,28,@HouseDemocrats @SenateGOP @SenateDems @HouseGOP,#immigration #BuildTheWall #AmericaFirst #Trum...,,2019-11-30 12:15:10+00:00,"We don't want these stabbing, raping, self-det..."
96,1200748434220888064,https://twitter.com/Steeper33/status/120074843...,Steeper33,39,87,,#DefundCBC,,2019-11-30 12:08:30+00:00,Forget about the billions of tax dollars leavi...
97,1200747578092072961,https://twitter.com/peoplesvote_uk/status/1200...,peoplesvote_uk,54,83,,,,2019-11-30 12:05:06+00:00,Our NHS relies on immigration to sustain itsel...
98,1200746661338849281,https://twitter.com/JohnFromCranber/status/120...,JohnFromCranber,60,178,,,,2019-11-30 12:01:27+00:00,My view: Merkel is a Marxist Subversive. She’s...


In [5]:
tweets