# Twitter Data Collection

Based on:
API demo – Twitter
by
(c) Nuno António 2020 - Rev. 1.00

### Load packages and do the initializations

In [195]:
# Load libraries
import numpy as np
import pandas as pd
import tweepy

### Functions

### Search definitions
For details on how to build seach queries and filters check:
- https://developer.twitter.com/en/docs/twitter-api/v1/rules-and-filtering/build-standard-queries
- https://developer.twitter.com/en/docs/twitter-api/v1/rules-and-filtering/search-operators



In [197]:
# Terms to search
searchQuery = '(@joebiden OR @USAGov OR @POTUS OR @HHSvaccines OR @US_FDA OR @CDCgov OR @WhiteHouse) (vaccine OR vaccination OR vax OR moderna OR AstraZenca OR Biontech OR JNJ) -(is:verified)' 

In [198]:
# First date
beginDate = '2021-01-01'

In [199]:
# Language
lang = 'en'

In [200]:
# Apply a filter not to get retweets
filter = ' -filter:retweets'

### Do the search

For aditional information on search parameters see:
- **Tweepy**: https://docs.tweepy.org/en/latest/api.html
- **Tweeter**: https://developer.twitter.com/en/docs/twitter-api/v1/tweets/search/api-reference/get-search-tweets

In [201]:
# Access details - REPLACE BY OUR ACCOUNT
api_key = 
api_key_secret = 
access_token = 
access_token_secret =

auth = tweepy.OAuthHandler(api_key, api_key_secret)
auth.set_access_token(access_token, access_token_secret)

In [202]:
# Create the API endpoint with the indication to wait if rate limits are exceeded
api = tweepy.API(auth, wait_on_rate_limit=True, timeout=600)

In [203]:
# Create an empty list for tweets
tws = []

In [None]:

# Define the maximum number of tweets to retrieve
# The actual extraction was performed on Google Colab because the connection would time out eventually on the local machine.
max_tweets = 500000

# Query text
q = searchQuery+' '+filter

# Loop for each 100 (due to the limitations)
for tweet in tweepy.Cursor(api.search,
                            q=q,
                            rpp=100,
                            lang=lang,
                            since=beginDate,
                            tweet_mode='extended').items(max_tweets):

    # Create a list with each tweet data and metadata
    # For information on additional fields check https://developer.twitter.com/en/docs/twitter-api/v1/data-dictionary/object-model/tweet

    if 'retweeted_status' in dir(tweet):
      tweet_text=tweet.retweeted_status.full_text
    else:
      tweet_text=tweet.full_text
    tw = [tweet_text,
          tweet.user.screen_name,
          tweet.user.followers_count,
          tweet.source_url,
          tweet.created_at,
          notMandatoryObjects("tweet.reply_count",0),
          notMandatoryObjects("tweet.retweet_count",0),
          tweet.favorite_count]
    if len(tws)%10==0:
          print(str(len(tws))+" Tweets extracted")
    # Transform to tuple and append it to the list of tweets to facilite the posterior transformation to a dataframe
    tw = tuple(tw)
    tws.append(tw)


In [182]:
# Create the dataframe
tweetsDF = pd.DataFrame(tws, columns=['text',
                                      'user screen name',
                                      'user followers',
                                      'url',
                                      'created at',
                                      'replies',
                                      'retweets',
                                      'likes'
                        ])
display(tweetsDF)

Unnamed: 0,text,user screen name,user followers,url,created at,replies,retweets,likes
0,@JoeS619 @US_FDA @CDCgov @DrWoodcockFDA If U h...,noorchashm,6389,https://mobile.twitter.com,2021-05-23 14:30:42,0,0,0
1,@EmmaHil42019577 @JoeBiden @WhiteHouse The sci...,LeeCeinwen,190,http://twitter.com/download/iphone,2021-05-23 14:30:41,0,0,1
2,@MPoppodum @earthshakerph @BBCNews @POTUS With...,1basp1,305,http://twitter.com/download/android,2021-05-23 14:30:12,0,0,0
3,The @US_FDA says it's now OK to store the @pfi...,ghn_news,15591,https://www.heyorca.com,2021-05-23 14:30:04,0,0,0
4,@CDCgov Lol 😂😂😂\nTrust us the vaccine works bu...,ShaneGArchulet2,6,https://mobile.twitter.com,2021-05-23 14:29:14,0,0,0
...,...,...,...,...,...,...,...,...
891,That’s it. The Heil Hitler VAX PASS was the la...,x_rhodium,55,https://mobile.twitter.com,2021-05-22 04:36:11,0,0,0
892,@JoeBiden How 'bout you write a law to make va...,smallbizowner99,11,https://mobile.twitter.com,2021-05-22 04:35:24,0,0,0
893,@NAChristakis @WhiteHouse I’m a little worried...,TheAdamMack,39,http://twitter.com/#!/download/ipad,2021-05-22 04:30:55,0,0,0
894,@Jyn2Leia2Rey @dumb_ars_people @AndyRichter @J...,JimMaruschak,88,http://twitter.com/download/android,2021-05-22 04:24:11,0,0,0


### Save results

In [183]:
# Save the extracted tweets to an Excel file
tweetsDF.to_excel("Tweets.xlsx", index=False)