In [None]:
# created on Dec 24, 2020
# modified on April 14, 2021
# modified on Jan 2, 2021
# modified on Jan 22, 2023
# @author:          Bo Zhao
# @email:           zhaobo@uw.edu
# @website:         https://hgis.uw.edu
# @organization:    Department of Geography, University of Washington, Seattle
# @description:     Search historical tweets using locational information

In [None]:
import tweepy
import pandas as pd

In [None]:
# the file path where to store the output csv on google drive
output_file = '/gdrive/My Drive/twsearch-result.csv'

# Apply for your own Twitter API keys at https://developer.twitter.com/en/apply-for-access
consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True)

In [55]:
# Define the search term and the date_since date as variables
search_words = "guns"
# make sure there is no space between lat, long and the radius.
location = "39.09371548242884,-94.60596159281076,25.0mi"
date_since = "2023-1-16"
# read the Twitter API document to look for other ways to customize your queries.
# refer to https://developer.twitter.com/en/docs/twitter-api/v1/rules-and-filtering/search-operators
# for example: you can ignore all the retweets by #wildfires -filter:retweets
# Geolocalization: the search operator “near” isn’t available in the API, but there is a more precise way to restrict
# your query by a given location using the geocode parameter specified with the template “latitude,longitude,radius”,
# for example, “47.6138893,-122.3107869,10mi” (capitol hill at Seattle). When conducting geo searches, the search API will first attempt to find Tweets、
# which have lat/long within the queried geocode, and in case of not having success, it will attempt to find Tweets created
# by users whose profile location can be reverse geocoded into a lat/long within the queried geocode, meaning that is possible
# to receive Tweets which do not include lat/long information.


# Collect tweets
# tweets = tweepy.Cursor(api.search, q=search_words, lang="en", since=date_since).items(100)
tweets = tweepy.Cursor(api.search, q=search_words, geocode=location, lang="en", since=date_since).items(1000)

# create an array to store the result
result = []

# Iterate and print tweets
for tweet in tweets:
    row = {
        'username': tweet.author.name,
        'userid': tweet.author.id,
        'profile_location': tweet.author.location,
        'created_at': str(tweet.created_at),
        'text': tweet.text,
        'retweet_count': tweet.retweet_count,
        'source': tweet.source,
        'coordinates': tweet.coordinates
    }
    result.append(row)
    print(row)

# Store the results as a pandas dataframe
df = pd.DataFrame(result)

# notify the completion of the crawling in the console.
print("the crawling task is finished.")

{'username': 'Majestic Feline', 'userid': 1597419296438861825, 'profile_location': 'Kansas City MO', 'created_at': '2023-01-22 21:12:56', 'text': "@katiehobbs Guns don't kill people. People kill people. Try locking up violent criminals before assaulting rights.", 'retweet_count': 0, 'source': 'Twitter Web App', 'coordinates': None}
{'username': 'Brian W. Peterson', 'userid': 2902244659, 'profile_location': 'Kansas City', 'created_at': '2023-01-22 21:06:48', 'text': "@SirHardHarry It's not just a US problem. Dig into the stats and you'll see the press says this despite the facts.… https://t.co/zuL6BQ0CmT", 'retweet_count': 0, 'source': 'Twitter Web App', 'coordinates': None}
{'username': 'Joni Lane', 'userid': 825946830, 'profile_location': 'Kansas City, MO', 'created_at': '2023-01-22 20:57:56', 'text': '@Raquel_MiamiBch I agree. They cannot call themselves pro-life and support the death penalty and guns guns guns.', 'retweet_count': 0, 'source': 'Twitter for Android', 'coordinates': No

In [56]:
# Create data on to Google Drive
from google.colab import drive
# Mount your Drive to the Colab VM.
drive.mount('/gdrive')
  
df.to_csv(output_file, index=False)

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [57]:
# download the csv to your local computer
from google.colab import files
files.download(output_file)
print("the csv has been downloaded to your local computer. The program has been completed successfully.")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

the csv has been downloaded to your local computer. The program has been completed successfully.
