8.4 Twitter: Collecting Tweets

In [None]:
''' Provides function that connects to Twitter
    Usage is shown in main test program
'''

import tweepy

# login to Twitter with ordinary rate limiting
def oauth_login():
  # get the authorization from Twitter and save in the Tweepy package
  auth = tweepy.OAuthHandler(CONSUMER_KEY,CONSUMER_SECRET)
  auth.set_access_token(OAUTH_TOKEN,OAUTH_SECRET)
  tweepy_api = tweepy.API(auth)

  # if a null api is returned, give error message
  if (not tweepy_api):
      print ("Problem Connecting to API with OAuth")

  # return the Twitter api object that allows access for the Tweepy api functions
  return tweepy_api

# login to Twitter with extended rate limiting
#  must be used with the Tweepy Cursor to wrap the search and enact the waits
def appauth_login():
  # get the authorization from Twitter and save in the Tweepy package
  auth = tweepy.AppAuthHandler(CONSUMER_KEY,CONSUMER_SECRET)
  # apparently no need to set the other access tokens
  tweepy_api = tweepy.API(auth, wait_on_rate_limit=True) #wait_on_rate_limit_notify=True)

  # if a null api is returned, give error message
  if (not tweepy_api):
      print ("Problem Connecting to API with AppAuth")

  # return the Twitter api object that allows access for the Tweepy api functions
  return tweepy_api
    
# Test program to show how to connect
if __name__ == '__main__':
  tweepy_api = oauth_login()
  print ("Twitter OAuthorization: ", tweepy_api)
  tweepy_api = appauth_login()
  print ("Twitter AppAuthorization: ", tweepy_api)

Twitter OAuthorization:  <tweepy.api.API object at 0x7fa920297640>
Twitter AppAuthorization:  <tweepy.api.API object at 0x7fa931f14190>


In [12]:
# Change to directory where the 'twitter_login_fn' python script is located

%cd /Users/pergolicious/Downloads/

/Users/pergolicious/Downloads


In [19]:
''' 
This main topic search function for Twitter using the python tweepy package
      Tries to get up to 1000 results from the Twitter REST/Search API search function
        using the tweepy Cursor to repeat the twitter search api requests
      The query string may be a keyword or hashtag, or a set of them connected by or
        example:  query = "#CuseLAX OR CNYlacrosse"
        some queries require quotes on the command line
    Returns a list of json formatted tweets
'''

import tweepy
import json
import sys
from twitter_login_fn import oauth_login
from twitter_login_fn import appauth_login
from DB_fn import save_to_DB


'''
  Uses the tweepy Cursor to wrap a twitter api search for the query string
    Returns json formatted results
'''

def simple_search(api, query, max_results=20):
  # the first search initializes a cursor, stored in the metadata results,
  #   that allows next searches to return additional tweets
  #search_results = [status for status in tweepy.Cursor(api.search, q=query).items(max_results)]
  search_results = [status for status in tweepy.Cursor(api.search(), q=query).items(max_results)]
  # for each tweet, get the json representation
  tweets = [tweet._json for tweet in search_results]
  
  return tweets

# use a main so can get command line arguments
if __name__ == '__main__':
    # Make a list of command line arguments, omitting the [0] element
    # which is the script itself.
    args = sys.argv[1:]
    if not args or len(args) < 4:
        # print('usage: python twitter_simple_search.py <query> <num tweets> <DB name> <collection name>')
        print('usage: python twitter_simple_search.py "#CuseLAX" 100 lax syracuselax')
        sys.exit(1)
    query = args[0]
    num_tweets = int(args[1])
    DBname = args[2]
    DBcollection = args[3]

    # api = oauth_login()
    ''' if needed switch to using the appauth login to avoid rate limiting '''
    api = appauth_login()
    print ("Twitter Authorization: ", api)
    
    # access Twitter search
    result_tweets = simple_search(api, query, max_results=num_tweets)
    print ('Number of result tweets: ', len(result_tweets))

    # save the results in a database collection
    #   change names to lowercase because they are not case sensitive
    #   and remove special characters like hashtags and spaces (other special characters may also be forbidden)
    DBname = DBname.lower()
    DBname = DBname.replace('#', '')
    DBname = DBname.replace(' ', '')
    DBcollection = DBcollection.lower()
    DBcollection = DBcollection.replace('#', '')
    DBcollection = DBcollection.replace(' ', '')
    
    # use the save and load functions in this program
    save_to_DB(DBname, DBcollection, result_tweets)

    # Done!
  

usage: python twitter_simple_search.py "#CuseLAX" 100 lax syracuselax


SystemExit: 1

In [18]:
>>> import pymongo
# connect to the database
>>> client = pymongo.MongoClient('localhost', 27017)
>>> client.list_database_names()

['admin', 'bball', 'bikes', 'config', 'local', 'peopledb', 'usgs']