In [1]:
# created on Dec 24, 2020
# @author:          Bo Zhao
# @email:           zhaobo@uw.edu
# @website:         https://hgis.uw.edu
# @organization:    Department of Geography, University of Washington, Seattle
# @description:     Search geo-tagged tweets within the U.S. This script is modified from https://github.com/shawn-terryah/Twitter_Geolocation

!python -m pip install tweepy
import tweepy, json, time

Collecting tweepy
  Downloading tweepy-3.10.0-py2.py3-none-any.whl (30 kB)
Collecting requests-oauthlib>=0.7.0
  Downloading requests_oauthlib-1.3.0-py2.py3-none-any.whl (23 kB)
Installing collected packages: requests-oauthlib, tweepy
Successfully installed requests-oauthlib-1.3.0 tweepy-3.10.0


In [2]:
class StreamListener(tweepy.StreamListener):
    """tweepy.StreamListener is a class provided by tweepy used to access
    the Twitter Streaming API to collect tweets in real-time.
    """

    def __init__(self, time_limit=60, file=""):
        """class initialization"""
        self.start_time = time.time()
        self.limit = time_limit
        self.f = open(file, 'a', encoding="utf-8")
        super(StreamListener, self).__init__()

    def on_data(self, data):
        """This is called when data are streamed in."""
        if (time.time() - self.start_time) < self.limit:
            datajson = json.loads(data)
            print(datajson, "\n")
            if 'id' not in datajson.keys():
                time.sleep(10)
            else:
                # {'limit': {'track': 13, 'timestamp_ms': '1585851016736'}}

                id = datajson['id']
                username = datajson['user']['screen_name']
                created_at = datajson['created_at']
                text = datajson['text'].strip().replace("\n", "")

                # process the geo-tags
                if datajson['coordinates'] == None:
                    try:
                        bbox = datajson['place']['bounding_box']['coordinates'][0]
                        lng = (bbox[0][0] + bbox[2][0]) / 2.0
                        lat = (bbox[0][1] + bbox[1][1]) / 2.0
                    except:
                        lat = 0
                        lng = 0
                else:
                    lng = datajson['coordinates']['coordinates'][0]
                    lat = datajson['coordinates']['coordinates'][1]

                if lat != 0:
                    record = '%s, %s, %f, %f, %s \n' % (username, created_at, lng, lat, text)
                    print(record)
                    self.f.write(record)
                else:
                    pass
        else:
            self.f.close()
            print("finished")
            return False

In [4]:
if __name__ == "__main__":
    # These are provided to you through the Twitter API after you create a account
    # register a Twitter App to get the keys and access tokens.
    output_file = "assets/tweets.csv"

    # Apply for your own Twitter API keys at https://developer.twitter.com/en/apply-for-access
    consumer_key = "gbW0BTSRAAPN642HQZPYB164E"
    consumer_secret = "dVc3btbUohooa0lKvSXg90gIoAE46ruGwk5yilLDEfpMGh2V1G"
    access_token = "1134754734915461122-pem852CSzk95tSKlcxo9LfowN1gfrN"
    access_token_secret = "PnoBhyk7SbrDqGj0c99XLVQSRoG4YIbHbCbT5tA7LUejw"

    myauth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    myauth.set_access_token(access_token, access_token_secret)

    # LOCATIONS are the longitude, latitude coordinate corners for a box that restricts the
    # geographic area from which you will stream tweets. The first two define the southwest
    # corner of the box and the second two define the northeast corner of the box.
    LOCATIONS = [-124.7771694, 24.520833, -66.947028, 49.384472,  # Contiguous US
                 -164.639405, 58.806859, -144.152365, 71.76871,  # Alaska
                 -160.161542, 18.776344, -154.641396, 22.878623]  # Hawaii

    stream_listener = StreamListener(time_limit=60, file=output_file)
    stream = tweepy.Stream(auth=myauth, listener=stream_listener)
    stream.filter(locations=LOCATIONS, track=['covid'], is_async=True)

{'created_at': 'Mon Feb 01 23:35:00 +0000 2021', 'id': 1356385595250106368, 'id_str': '1356385595250106368', 'text': '@SenateDems @SenateFloor Screw the GOP &amp; pass the Bill', 'source': '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>', 'truncated': False, 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': 73238146, 'in_reply_to_user_id_str': '73238146', 'in_reply_to_screen_name': 'SenateDems', 'user': {'id': 162759015, 'id_str': '162759015', 'name': 'tony calderon', 'screen_name': 'JAC0403', 'location': 'san antonio, texas', 'url': None, 'description': 'retired dude\nextremely liberal person, hopefully belonging to world', 'translator_type': 'none', 'protected': False, 'verified': False, 'followers_count': 223, 'friends_count': 454, 'listed_count': 10, 'favourites_count': 32599, 'statuses_count': 53853, 'created_at': 'Sun Jul 04 16:03:20 +0000 2010', 'utc_offset': None, 'time_zone': None, 'geo_enabled': True, 

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



{'created_at': 'Mon Feb 01 23:35:25 +0000 2021', 'id': 1356385698878730243, 'id_str': '1356385698878730243', 'text': 'RT @CoronavirusNewv: 🇪🇸 | ESPAÑA\n\nLas pruebas anales de COVID-19 ya llegaron también a España  https://t.co/e5Zgh39Joj', 'source': '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>', 'truncated': False, 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 274240467, 'id_str': '274240467', 'name': 'jorgewilfredolabrada', 'screen_name': 'jorgewilfredo', 'location': None, 'url': None, 'description': None, 'translator_type': 'none', 'protected': False, 'verified': False, 'followers_count': 76, 'friends_count': 150, 'listed_count': 0, 'favourites_count': 22063, 'statuses_count': 22222, 'created_at': 'Wed Mar 30 00:38:08 +0000 2011', 'utc_offset': None, 'time_zone': None, 'geo_enabled': False, 'lang': None, 'contributor

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



{'created_at': 'Mon Feb 01 23:35:39 +0000 2021', 'id': 1356385757552902146, 'id_str': '1356385757552902146', 'text': 'RT @LuisVerdesotoo1: #URGENTE \n\nDenuncié ante el @TCE_Ecuador presuntas infracciones electorales cometidas en la #CampañaElectoralEc por el…', 'source': '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>', 'truncated': False, 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 325158179, 'id_str': '325158179', 'name': 'Raúl Fariño Bajaña', 'screen_name': 'rfarinobajana', 'location': 'Gquil', 'url': None, 'description': 'Azul x 100pre', 'translator_type': 'none', 'protected': False, 'verified': False, 'followers_count': 185, 'friends_count': 37, 'listed_count': 1, 'favourites_count': 499, 'statuses_count': 19162, 'created_at': 'Mon Jun 27 20:48:12 +0000 2011', 'utc_offset': None, 'time_zone': None, 'geo_enabled':