In [None]:
# import dependencies
import GetOldTweets3 as got
import pandas as pd
import datetime as dt

In [None]:
# increase the width of the 'Text' column
pd.set_option('max_colwidth', 800)

In [None]:
# We used the GetOldTweets3 library and referenced code below: https://pypi.org/project/GetOldTweets3/

# create function to retrieve Twitter data using GetOldTweets3
def get_tweets(query, state, startDate, endDate, maxTweet):            
    tweetCriteria = got.manager.TweetCriteria().setQuerySearch(query)\ #search query
                                            .setSince(startDate)\ #start date of query range
                                            .setUntil(endDate)\ #end date of query range
                                            .setNear(state)\ # geographical location of query
                                            .setWithin("500mi")\ # radius of geographical location 
                                            .setMaxTweets(maxTweet) #maximum number of tweets pulled
    tweet = got.manager.TweetManager.getTweets(tweetCriteria) # set above to variable

    text_tweets = [[
                   tw.text,
                   tw.date] for tw in tweet] # store the data pulled in a list

    df_state = pd.DataFrame(text_tweets, columns = ['Text', 'Date']) #convert data pulled to dataframe

    return df_state

In [None]:
#The 30 days to loop through.
start_date = 1
end_date = 30

#Change out the month that you want to loop. This is due to performance issues - we had to circumvent the connection timeouts that Twitter was enforcing by running the code below once for each month. 
monthArray = [3] #4,5,6 are the other months we used for this project
dateArray = range(start_date, end_date, 1)
maxTweet = 500
yearString = str(2020)

#Comment in the term you want to search.
search_str = "Covid"
#search_str = "Covid-19"
#search_str = "Lockdown"
#search_str = "Lysol"
#search_str = "Pandemic"
#search_str = "Quarentine"
#search_str = "Vaccine"

data_df = pd.DataFrame(columns = ['Text', 'Date'])

# loop through each month and each date from the list collections above and run the get_tweets function defined above to pull the data
for x in monthArray:
    for y in dateArray:
        # construct the start end date parameters using the month and date variables     
        startDate = yearString + '-' + str(x) + '-' + str(y) 
        print(startDate)
        endDate = yearString + '-' + str(x) + '-' + str(y+1)
        print(endDate)
        print(search_str)
        
        # create data_df if it doesn't exist and concat to it if it already exists
        if data_df.empty: 
            data_df = get_tweets(search_str, 'USA', startDate, endDate, maxTweet)
            data_df['search_term'] = search_str
        else:
           df = get_tweets(search_str, 'USA', startDate, endDate, maxTweet)
           df['search_term'] = search_str
           data_df = pd.concat([data_df, df])

In [None]:
# reformat the Date column
data_df['Date'] = data_df['Date'].dt.strftime('%m/%d/%Y')

In [None]:
# export to CSV 
# change the file name based on search term and month of data
data_df.to_csv(r'Lysol_June_Data.csv', index = False)