In [1]:
import requests
import os
import json
import pandas as pd
import csv
import datetime
import dateutil.parser
import unicodedata
import time

In [2]:
os.environ['TOKEN'] = '********'

In [3]:
def auth():
    return os.getenv('TOKEN')

In [4]:
def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers

In [5]:
def create_url(keyword, start_date, end_date, max_results = 10):
    
    search_url = "https://api.twitter.com/2/tweets/search/all"
    query_params = {'query': keyword,
                    'start_time': start_date,
                    'end_time': end_date,
                    'max_results': max_results,
                    'expansions': 'author_id,in_reply_to_user_id,geo.place_id',
                    'tweet.fields': 'id,text,author_id,in_reply_to_user_id,geo,conversation_id,created_at,lang,public_metrics,referenced_tweets,reply_settings,source',
                    'user.fields': 'id,name,username,created_at,description,public_metrics,verified',
                    'place.fields': 'full_name,id,country,country_code,geo,name,place_type',
                    'next_token': {}}
    return (search_url, query_params)

In [6]:
def connect_to_endpoint(url, headers, params, next_token = None):
    params['next_token'] = next_token   #params object received from create_url function
    response = requests.request("GET", url, headers = headers, params = params)
    print("Endpoint Response Code: " + str(response.status_code))
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

In [8]:
def append_to_csv(json_response, fileName):
    counter = 0
    for tweet in json_response['data']:
        author_id = tweet['author_id']
        created_at = dateutil.parser.parse(tweet['created_at'])
        if ('geo' in tweet):   
            if ('place_id' in tweet['geo']):
                geo = tweet['geo']['place_id']
            else:
                geo = " "
        else:
            geo = " "
        tweet_id = tweet['id']
        lang = tweet['lang']
        retweet_count = tweet['public_metrics']['retweet_count']
        reply_count = tweet['public_metrics']['reply_count']
        like_count = tweet['public_metrics']['like_count']
        quote_count = tweet['public_metrics']['quote_count']
        if 'source' in tweet:
            source = tweet['source']
        else:
            source = " "
        text = tweet['text']
        if  ('user' in tweet):
            if ('location' in tweet['user']):
                location = tweet['user']['location']
            else:
                location = " "
        else:
            location = " "
        res = [author_id, created_at, geo, tweet_id, lang, like_count, quote_count, reply_count,
               retweet_count, source, text, location]
        with open(fileName, 'a+', newline='') as fd:
            writer_object = csv.writer(fd)
            writer_object.writerow(res)
            fd.close()
        counter += 1
    print("# of Tweets added from this response: ", counter) 

In [None]:
bearer_token = auth()
headers = create_headers(bearer_token)
keyword = '(curbside grocer) OR (grocery curbside) OR (groceries curbside) lang:en place_country:US'
start_list =    ['2020-01-01T00:00:00.000Z','2020-02-01T00:00:00.000Z',
                 '2020-03-01T00:00:00.000Z','2020-04-01T00:00:00.000Z','2020-05-01T00:00:00.000Z',
                '2020-06-01T00:00:00.000Z','2020-07-01T00:00:00.000Z','2020-08-01T00:00:00.000Z',
                '2020-09-01T00:00:00.000Z','2020-10-01T00:00:00.000Z','2020-11-01T00:00:00.000Z',
                '2020-12-01T00:00:00.000Z','2021-01-01T00:00:00.000Z','2021-02-01T00:00:00.000Z',
                 '2021-03-01T00:00:00.000Z','2021-04-01T00:00:00.000Z','2021-05-01T00:00:00.000Z',
                 '2021-06-01T00:00:00.000Z','2021-07-01T00:00:00.000Z','2021-08-01T00:00:00.000Z',
                 '2021-09-01T00:00:00.000Z','2021-10-01T00:00:00.000Z','2021-11-01T00:00:00.000Z',
                 '2021-12-01T00:00:00.000Z','2022-01-01T00:00:00.000Z','2022-02-01T00:00:00.000Z',
                 '2022-03-01T00:00:00.000Z','2022-04-01T00:00:00.000Z','2022-05-01T00:00:00.000Z',
                 '2022-06-01T00:00:00.000Z','2022-07-01T00:00:00.000Z','2022-08-01T00:00:00.000Z',
                 '2022-09-01T00:00:00.000Z','2022-10-01T00:00:00.000Z','2022-11-01T00:00:00.000Z'
                ]

end_list =      ['2020-01-31T00:00:00.000Z','2020-02-28T00:00:00.000Z',
                 '2020-03-31T00:00:00.000Z','2020-04-30T00:00:00.000Z','2020-05-31T00:00:00.000Z',
                '2020-06-30T00:00:00.000Z','2020-07-31T00:00:00.000Z','2020-08-31T00:00:00.000Z',
                '2020-09-30T00:00:00.000Z','2020-10-31T00:00:00.000Z','2020-11-30T00:00:00.000Z',
                '2020-12-31T00:00:00.000Z','2021-01-31T00:00:00.000Z','2021-02-28T00:00:00.000Z',
                 '2021-03-31T00:00:00.000Z','2021-04-30T00:00:00.000Z','2021-05-31T00:00:00.000Z',
                 '2021-06-30T00:00:00.000Z','2021-07-31T00:00:00.000Z','2021-08-31T00:00:00.000Z',
                 '2021-09-30T00:00:00.000Z','2021-10-31T00:00:00.000Z','2021-11-30T00:00:00.000Z',
                 '2021-12-31T00:00:00.000Z','2022-01-31T00:00:00.000Z','2022-02-28T00:00:00.000Z',
                 '2022-03-31T00:00:00.000Z','2022-04-30T00:00:00.000Z','2022-05-31T00:00:00.000Z',
                 '2022-06-30T00:00:00.000Z','2022-07-31T00:00:00.000Z','2022-08-31T00:00:00.000Z',
                 '2022-09-30T00:00:00.000Z','2022-10-31T00:00:00.000Z','2022-11-30T00:00:00.000Z'
                ]

max_results = 500
total_tweets = 0

csvFile = open("curbside_data.csv", "a", newline="", encoding='utf-8')
csvWriter = csv.writer(csvFile)

csvWriter.writerow(['author id', 'created_at', 'geo', 'id','lang', 'like_count', 'quote_count', 'reply_count','retweet_count','source','tweet'])
csvFile.close()

for i in range(0,len(start_list)):
    count = 0
    max_count = 500000
    flag = True
    next_token = None
    while flag:
        if count >= max_count:
            break
        print("-------------------")
        print("Token: ", next_token)
        url = create_url(keyword, start_list[i],end_list[i], max_results)
        json_response = connect_to_endpoint(url[0], headers, url[1], next_token)
        result_count = json_response['meta']['result_count']

        if 'next_token' in json_response['meta']:
            next_token = json_response['meta']['next_token']
            print("Next Token: ", next_token)
            if result_count is not None and result_count > 0 and next_token is not None:
                print("Start Date: ", start_list[i])
                append_to_csv(json_response, "newest2_curbside_data.csv")
                count += result_count
                total_tweets += result_count
                print("Total # of Tweets added: ", total_tweets)
                print("-------------------")
                time.sleep(5)                
        else:
            if result_count is not None and result_count > 0:
                print("-------------------")
                print("Start Date: ", start_list[i])
                append_to_csv(json_response, "newest2_curbside_data.csv")
                count += result_count
                total_tweets += result_count
                print("Total # of Tweets added: ", total_tweets)
                print("-------------------")
                time.sleep(5)
            flag = False
            next_token = None
        time.sleep(5)
print("Total number of results: ", total_tweets)