In [1]:
import tweepy, sys, os, logging, json

# Twitter API using Tweepy

Set up activity log file:

In [2]:
logging.basicConfig(filename='twitter_error_log.log',filemode='w', level=logging.ERROR)

Load consumer key and secret securely:

In [3]:
consumer_key = ''
consumer_secret = ''

Initialize API

In [4]:
auth = tweepy.AppAuthHandler(consumer_key, consumer_secret)
 
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True,
                 parser=tweepy.parsers.JSONParser()
                )
 
if (not api):
    print ("Can't Authenticate")
    sys.exit(-1)

Define twitter searches

In [6]:
# Twitter API search terms
climate_search = ('(global AND warming) OR #climate')
victoria_search = ('victoria secret')
news_search = ('(trump OR clinton) AND (health OR obamacare)')
facebook_search = ('facebook AND (data OR email OR user OR emails OR documents OR confidential)')
family_sep_search = ('family separation')
basic_income_search = ('(universal AND basic AND income) OR (basic AND income)')
democracy_search = ('democracy')

searchQuery = democracy_search

### Running the queries for all the tweets on a certain topic

In [7]:
maxTweets = 1000000 # Some arbitrary large number
tweetsPerQry = 100  # this is the max the API permits

# If results from a specific ID onwards are reqd, set since_id to that ID.
# else default to no lower limit, go as far back as API allows
sinceId = None

# If results only below a specific ID are reqd, set max_id to that ID.
# else default to no upper limit, start from the most recent tweet matching the search query.
max_id = -1

tweetCount = 0
tweetFiles = 0
msg =  "Downloading max {0} tweets".format(maxTweets)
logging.error(msg)
tweets = []
while 4000*tweetFiles + tweetCount < maxTweets:
    try:
        if (max_id <= 0):
            if (not sinceId):
                new_tweets = api.search(q=searchQuery, count=tweetsPerQry)
            else:
                new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
                                        since_id=sinceId)
        else:
            if (not sinceId):
                new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
                                        max_id=str(max_id - 1))
            else:
                new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
                                        max_id=str(max_id - 1),
                                        since_id=sinceId)
                
        new_tweets = new_tweets['statuses']
        if len(new_tweets) == 0:
            msg = 'No tweets found'
            logging.error(msg)
            break
        
        tweets.extend(new_tweets)
        tweetCount += len(new_tweets)
        msg = "Downloaded {0} tweets".format(tweetCount + tweetFiles*4000)
        logging.error(msg)
        max_id = new_tweets[-1]['id']
        
        if tweetCount > 4000:
            with open('tweets'+str(tweetFiles)+'.json', 'w') as outfile:  
                json.dump(tweets, outfile)
            msg = "JSON file saved"
            logging.error(msg)
            tweetFiles += 1
            tweetCount = 0
            del tweets[:]
                
    except tweepy.TweepError as e:
        msg = 'Query failed when max_id equaled {0}: {1}'.format(max_id, e)
        logging.error(msg)

if tweetCount > 0:
    with open('tweets'+str(tweetFiles)+'.json', 'w') as outfile:  
        json.dump(tweets, outfile)
    msg = "JSON file saved"
    logging.error(msg)
    tweetFiles += 1
    tweetCount = 0

KeyboardInterrupt: 

# Check the result of one file

In [3]:
with open('tweets0.json') as json_file:  
    test = json.load(json_file)

In [5]:
print(len(test))
test[0]

4047


{'contributors': None,
 'coordinates': None,
 'created_at': 'Wed Nov 28 00:14:31 +0000 2018',
 'entities': {'hashtags': [],
  'symbols': [],
  'urls': [],
  'user_mentions': [{'id': 187289134,
    'id_str': '187289134',
    'indices': [3, 18],
    'name': 'Clint Watts',
    'screen_name': 'selectedwisdom'}]},
 'favorite_count': 0,
 'favorited': False,
 'geo': None,
 'id': 1067572409933287425,
 'id_str': '1067572409933287425',
 'in_reply_to_screen_name': None,
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None,
 'in_reply_to_user_id': None,
 'in_reply_to_user_id_str': None,
 'is_quote_status': True,
 'lang': 'en',
 'metadata': {'iso_language_code': 'en', 'result_type': 'recent'},
 'place': None,
 'quoted_status_id': 1065624061055721473,
 'quoted_status_id_str': '1065624061055721473',
 'retweet_count': 3210,
 'retweeted': False,
 'retweeted_status': {'contributors': None,
  'coordinates': None,
  'created_at': 'Thu Nov 22 16:00:11 +0000 2018',
  'entities': {'hashtags': [

# Check rate limit usage

In [28]:
import requests
from application_only_auth import Client
client = Client(consumer_key, consumer_secret)

In [29]:
response = client.request(
    "https://api.twitter.com/1.1/application/rate_limit_status.json?resources=help,users,search,statuses")

In [30]:
response

{'rate_limit_context': {'application': 'rQL8q2gfgXYUNVutobvFaRbO4'},
 'resources': {'help': {'/help/configuration': {'limit': 15,
    'remaining': 15,
    'reset': 1543800780},
   '/help/languages': {'limit': 15, 'remaining': 15, 'reset': 1543800780},
   '/help/privacy': {'limit': 15, 'remaining': 15, 'reset': 1543800780},
   '/help/settings': {'limit': 15, 'remaining': 15, 'reset': 1543800780},
   '/help/tos': {'limit': 15, 'remaining': 15, 'reset': 1543800780}},
  'search': {'/search/tweets': {'limit': 450,
    'remaining': 447,
    'reset': 1543800663}},
  'statuses': {'/statuses/lookup': {'limit': 300,
    'remaining': 300,
    'reset': 1543800780},
   '/statuses/oembed': {'limit': 180, 'remaining': 180, 'reset': 1543800780},
   '/statuses/retweeters/ids': {'limit': 300,
    'remaining': 300,
    'reset': 1543800780},
   '/statuses/retweets/:id': {'limit': 300,
    'remaining': 300,
    'reset': 1543800780},
   '/statuses/show/:id': {'limit': 900, 'remaining': 897, 'reset': 1543800