# Scraper for Twitter using GetOldTweets3

Package: https://github.com/Mottl/GetOldTweets3

### Notebook Author: Martin Beck

In [1]:
# Pip install GetOldTweets3 if you don't already have the package
# !pip install GetOldTweets3

# Imports
import GetOldTweets3 as got
import pandas as pd

## Query by Username
Creation of queries using GetOldTweets3

In [0]:
# Function the pulls tweets from a specific username and turns to csv file

# Parameters: (list of twitter usernames), (max number of most recent tweets to pull from)
def username_tweets_to_csv(usernamelist, max_tweets):
  for username in usernamelist:
    # Pulls in tweets from user, and sets max amount of tweets to pull
    tweetCriteria = got.manager.TweetCriteria().setUsername(username)\
                                            .setMaxTweets(max_tweets)
    # Creation of list that contains all tweets
    tweets = got.manager.TweetManager.getTweets(tweetCriteria)

    # Pull tweet information and creating list of date and text to prep for df
    tweets_information = [[tweet.date, tweet.text, tweet.geo] for tweet in tweets]

    # Creation of new df to easily get all information from tweets_information
    tweets_df = pd.DataFrame(tweets_information, columns = ['date', 'tweets', 'geo'])

    # Converting tweets dataframe to csv file
    tweets_df.to_csv('{}-{}k-tweets.csv'.format(username, int(max_tweets/1000)), sep=',')

## Query by Text Search

In [0]:
# Function that pulls tweets based on a general search query and turns to csv file

# Parameters: (text query you want to search), (max number of most recent tweets to pull from)
def text_query_to_csv(text_query, max_tweets):
  # Creation of query object
  tweetQuery = got.manager.TweetCriteria().setQuerySearch()\
                                                .setMaxTweets(max_tweets)
  # Grabs tweet from the query object
  testTweet = got.manager.TweetManager.getTweets(tweetQuery)

  # Pulls tweet data from all tweets
  testInformation = [[tweet.date, tweet.text, tweet.geo] for tweet in testTweet]
  
  # Creation of dataframe from tweets
  test_df = pd.DataFrame(testInformation, columns = ['Datetime', 'Text', 'geo'])
  
  # Converting tweets dataframe to csv file
  tweets_df.to_csv('{}-{}k-tweets.csv'.format(text_query, int(max_tweets/1000)), sep=',')

## Query Function Calls
Putting it all together and using functions created.

In [0]:
# Input username(s) to scrape tweets and name csv file
# Max recent tweets pulls x amount of most recent tweets from that user
usernames = ['example1','example2']
max_recent_tweets = 10000

In [0]:
# Calling function to turn username's past x amount of tweets into a CSV file
username_tweets_to_csv(usernames,max_recent_tweets)

In [0]:
# Input search query to scrape tweets and name csv file
# Max recent tweets pulls x amount of most recent tweets from that user
text_query = 'USA Election 2020'
max_recent_tweets = 5000

In [0]:
# Calling function to query X amount of relevant tweets and create a CSV file
text_query_to_csv(text_query, max_recent_tweets)