In [1]:
import tweepy
from tweepy import OAuthHandler
import json
import datetime as dt
import time
import os
import sys
import twitterKeys as twitterKeys




In [2]:
'''
In order to use this script you should register a data-mining application
with Twitter.  Good instructions for doing so can be found here:
http://marcobonzanini.com/2015/03/02/mining-twitter-data-with-python-part-1/
After doing this you can copy and paste your unique consumer key,
consumer secret, access token, and access secret into the load_api()
function below.
The main() function can be run by executing the command: 
python twitter_search.py
I used Python 3 and tweepy version 3.5.0.  You will also need the other
packages imported above.
'''

'\nIn order to use this script you should register a data-mining application\nwith Twitter.  Good instructions for doing so can be found here:\nhttp://marcobonzanini.com/2015/03/02/mining-twitter-data-with-python-part-1/\nAfter doing this you can copy and paste your unique consumer key,\nconsumer secret, access token, and access secret into the load_api()\nfunction below.\nThe main() function can be run by executing the command: \npython twitter_search.py\nI used Python 3 and tweepy version 3.5.0.  You will also need the other\npackages imported above.\n'

In [3]:
def load_api():
    ''' Function that loads the twitter API after authorizing the user. '''

    consumer_key = twitterKeys.apiKey             # changed from none
    consumer_secret = twitterKeys.apiSecret        # changed from none
    access_token = twitterKeys.accessToken       # changed from none
    access_secret = twitterKeys.accessSecret       # changed from none
    auth = OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_secret)
    # load the twitter API via tweepy
    api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
    return api




In [4]:

def tweet_search(api, query, max_tweets, max_id, since_id, geocode):
    ''' Function that takes in a search string 'query', the maximum
        number of tweets 'max_tweets', and the minimum (i.e., starting)
        tweet id. It returns a list of tweepy.models.Status objects. '''

    searched_tweets = []
    while len(searched_tweets) < max_tweets:
        remaining_tweets = max_tweets - len(searched_tweets)
        try:
            new_tweets = api.search(q=query, count=remaining_tweets,
                                    since_id=str(since_id),
                                    max_id=str(max_id - 1),
                                    tweet_mode='extended')
            #                                    geocode=geocode)
            print('found', len(new_tweets), 'tweets')
            if not new_tweets:
                print('no tweets found')
                break
            searched_tweets.extend(new_tweets)
            max_id = new_tweets[-1].id
        except tweepy.TweepError:
            # print('exception raised, waiting 15 minutes')
            print('(until:', dt.datetime.now() + dt.timedelta(minutes=15), ')')
            # time.sleep(15*60)
            break  # stop the loop
    return searched_tweets, max_id




In [5]:
def get_tweet_id(api, date='', days_ago=9, query='a'):
    ''' Function that gets the ID of a tweet. This ID can then be
        used as a 'starting point' from which to search. The query is
        required and has been set to a commonly used word by default.
        The variable 'days_ago' has been initialized to the maximum
        amount we are able to search back in time (9).'''

    if date:
        # return an ID from the start of the given day
        td = date + dt.timedelta(days=1)
        tweet_date = '{0}-{1:0>2}-{2:0>2}'.format(td.year, td.month, td.day)
        tweet = api.search(q=query, count=1, until=tweet_date, tweet_mode='extended')
    else:
        # return an ID from __ days ago
        td = dt.datetime.now() - dt.timedelta(days=days_ago)
        tweet_date = '{0}-{1:0>2}-{2:0>2}'.format(td.year, td.month, td.day)
        # get list of up to 10 tweets
        tweet = api.search(q=query, count=10, until=tweet_date, tweet_mode='extended')
        print('search limit (start/stop):', tweet[0].created_at)
        # return the id of the first tweet in the list
        return tweet[0].id


    

In [6]:

def write_tweets(tweets, filename):
    ''' Function that appends tweets to a file. '''

    with open(filename, 'a') as f:
        for tweet in tweets:
            json.dump({"id": tweet._json['id'], "text" : tweet._json['full_text']}, f)
            f.write('\n')




In [7]:

def main():
    ''' This is a script that continuously searches for tweets
        that were created over a given number of days. The search
        dates and search phrase can be changed below. '''

    ''' search variables: '''
    search_phrases = ["illegal alien", "illegal immigrant", "send them back"]
#                       "illegal immigration", "anti-american", "anti immigrant","ban immigrants",
#                      "go back to where they came from", "ban immigrants from",
#                       '''"immigrants from" "are terrorists"''', '''"immigrants from" "are criminals"''', 
#                       '#banimmigrants']
                      
    time_limit = 0.5 # Changed from   # runtime limit in hours 
    max_tweets = 100  # number of tweets per search (will be
    # iterated over) - maximum is 100
    # min_days_old, max_days_old = 0, 6         # search limits e.g., from 7 to 8
    # gives current weekday from last week,
    # min_days_old=0 will search from right now
    USA = '39.8,-95.583068847656,2500km'  # this geocode includes nearly all American
    # states (and a large portion of Canada)

    mindays = range(0, 6)
    maxdays = range(8, 11)
    # loop over search items,
    # creating a new file for each
    for min_days_old, max_days_old in zip(mindays, maxdays):
        for search_phrase in search_phrases:

            print('Search phrase =', search_phrase)
            ''' other variables '''
            name = search_phrase.split()[0]
            json_file_root = name + '/' + name
            os.makedirs(os.path.dirname(json_file_root), exist_ok=True)
            read_IDs = False

            # open a file in which to store the tweets
            if max_days_old - min_days_old == 1:
                d = dt.datetime.now() - dt.timedelta(days=min_days_old)
                day = '{0}-{1:0>2}-{2:0>2}'.format(d.year, d.month, d.day)
            else:
                d1 = dt.datetime.now() - dt.timedelta(days=max_days_old - 1)
                d2 = dt.datetime.now() - dt.timedelta(days=min_days_old)
                day = '{0}-{1:0>2}-{2:0>2}_to_{3}-{4:0>2}-{5:0>2}'.format(
                    d1.year, d1.month, d1.day, d2.year, d2.month, d2.day)
            json_file = json_file_root + '_' + day + '.json'
            if os.path.isfile(json_file):
                print('Appending tweets to file named: ', json_file)
                read_IDs = True

            # authorize and load the twitter API
            api = load_api()

            # set the 'starting point' ID for tweet collection
            if read_IDs:
                # open the json file and get the latest tweet ID
                with open(json_file, 'r') as f:
                    lines = f.readlines()
                    max_id = json.loads(lines[-1])['id']
                    print('Searching from the bottom ID in file')
            else:
                # get the ID of a tweet that is min_days_old
                if min_days_old == 0:
                    max_id = -1
                else:
                    max_id = get_tweet_id(api, days_ago=(min_days_old - 1))
            # set the smallest ID to search for
            since_id = get_tweet_id(api, days_ago=(max_days_old - 1))
            print('max id (starting point) =', max_id)
            print('since id (ending point) =', since_id)

            ''' tweet gathering loop  '''
            start = dt.datetime.now()
            end = start + dt.timedelta(hours=time_limit)
            count, exitcount = 0, 0
            while dt.datetime.now() < end:
                count += 1
                print('count =', count)
                # collect tweets and update max_id
                tweets, max_id = tweet_search(api, search_phrase, max_tweets,
                                              max_id=max_id, since_id=since_id,
                                              geocode=USA)
                # write tweets to file in JSON format
                if tweets:
                    write_tweets(tweets, json_file)
                    exitcount = 0
                else:
                    exitcount += 1
                    if exitcount == 3:
                        if search_phrase == search_phrases[-1]:
                            print('Maximum number of empty tweet strings reached - breaking')
                            break
                        else:
                            print('Maximum number of empty tweet strings reached - breaking')
                            break




In [8]:
if __name__ == "__main__":
    main()

Search phrase = illegal alien
search limit (start/stop): 2022-11-09 23:59:59
max id (starting point) = -1
since id (ending point) = 1590494443723198465
count = 1
found 73 tweets
found 16 tweets
found 8 tweets
found 0 tweets
no tweets found
count = 2
found 67 tweets
found 12 tweets
found 10 tweets
found 8 tweets
found 2 tweets
found 1 tweets
count = 3
found 69 tweets
found 15 tweets
found 9 tweets
found 4 tweets
found 2 tweets
found 1 tweets
count = 4
found 58 tweets
found 23 tweets
found 7 tweets
found 6 tweets
found 2 tweets
found 0 tweets
no tweets found
count = 5
found 46 tweets
found 15 tweets
found 7 tweets
found 11 tweets
found 9 tweets
found 2 tweets
found 2 tweets
found 2 tweets
found 1 tweets
found 1 tweets
found 0 tweets
no tweets found
count = 6
found 27 tweets
found 20 tweets
found 27 tweets
found 10 tweets
found 6 tweets
found 3 tweets
found 0 tweets
no tweets found
count = 7
found 17 tweets
found 8 tweets
found 47 tweets
found 27 tweets
found 1 tweets
count = 8
found 77 t

Rate limit reached. Sleeping for: 829


found 1 tweets
count = 44


KeyboardInterrupt: 

In [None]:
# Change search queries
# Change max number of tweets desired
# Change minimum number of tweets desired
# entire twitter keys

In [None]:
# test out program to see if you get tweets that match searches exactly. 
# Try with 1 search query. 
# If it doesn't match exactly, try it with triple quotes. --> ''' "i hate immigrants from"  '''