In [1]:
# For sending GET requests from the API
import requests
# For saving access tokens and for file management when creating and adding to the dataset
import os
# For dealing with json responses we receive from the API
import json
# For displaying the data after
import pandas as pd
# For saving the response data in CSV format
import csv
# For parsing the dates received from twitter in readable formats
import datetime
import dateutil.parser
import unicodedata
#To add wait time between requests
import time

In [15]:
keyword= [
'(Vaccine OR Vaccines)(Covid-19 OR Covid OR Corona OR Coronavirus) -("johnson and johnson" OR "Johnson&Johnson" OR "Johnson & Johnson") -pfizer -moderna -sinopharm -booster -Adenovirus -Anthrax -Cholera -Diphtheria -"Hepatitis A"  -"Hepatitis B" -Hepatitis  -HPV -FLU -Measles -Meningococcal -Mumps -Pertussis -Pneumococcal -Polio -rabies -rotavirus -rubella -shingles -smallpox -tetanus -Tuberculosis -"Typhoid Fever" -Varicella -"Yellow Fever" lang:en -is:nullcast',
'Pfizer lang:en -is:nullcast',
'moderna lang:en -is:nullcast',
'sinopharm lang:en -is:nullcast',
'("johnson and johnson" OR "Johnson&Johnson" OR "Johnson & Johnson") lang:en -is:nullcast', 
'(antivax OR anti-vax OR "anti vax") lang:en -is:nullcast', 
'Booster (shoot OR shot OR shots) lang:en -is:nullcast']


keywords2 = [ "Vaccine", "Pfizer", "moderna", "sinopharm", "Johnson", "antivax", "Booster"]

start_list =    ['2020-12-01T00:00:00.000Z',
                 '2021-01-01T00:00:00.000Z',
                 '2021-02-01T00:00:00.000Z',
                 '2021-03-01T00:00:00.000Z',
                 '2021-04-01T00:00:00.000Z',
                 '2021-05-01T00:00:00.000Z',
                 '2021-06-01T00:00:00.000Z',
                 '2021-07-01T00:00:00.000Z',
                 '2021-08-01T00:00:00.000Z',
                 '2021-09-01T00:00:00.000Z',
                 '2021-10-01T00:00:00.000Z',
                 '2021-11-01T00:00:00.000Z',
                 '2021-12-01T00:00:00.000Z',
                 '2022-01-01T00:00:00.000Z',
                 '2022-02-01T00:00:00.000Z']
end_list =    [  '2021-01-01T00:00:00.000Z',
                 '2021-02-01T00:00:00.000Z',
                 '2021-03-01T00:00:00.000Z',
                 '2021-04-01T00:00:00.000Z',
                 '2021-05-01T00:00:00.000Z',
                 '2021-06-01T00:00:00.000Z',
                 '2021-07-01T00:00:00.000Z',
                 '2021-08-01T00:00:00.000Z',
                 '2021-09-01T00:00:00.000Z',
                 '2021-10-01T00:00:00.000Z',
                 '2021-11-01T00:00:00.000Z',
                 '2021-12-01T00:00:00.000Z',
                 '2022-01-01T00:00:00.000Z',
                 '2022-02-01T00:00:00.000Z',
                 '2022-02-18T00:00:00.000Z',]



In [3]:
os.environ['TOKEN'] = ''
def auth():
    return os.getenv('TOKEN')
def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers
def create_url(keyword, start_date, end_date, max_results = 10):
    
    search_url = "https://api.twitter.com/2/tweets/search/all" #Change to the endpoint you want to collect data from

    #change params based on the endpoint you are using
    query_params = {'query': keyword,
                    'start_time': start_date,
                    'end_time': end_date,
                    'max_results': max_results,
                    'sort_order' : 'relevancy',
                    'expansions': 'author_id,in_reply_to_user_id,geo.place_id',
                    'tweet.fields': 'id,text,author_id,in_reply_to_user_id,geo,conversation_id,created_at,lang,public_metrics,referenced_tweets,reply_settings,source',
                    'user.fields': 'id,name,username,created_at,description,public_metrics,verified',
                    'place.fields': 'full_name,id,country,country_code,geo,name,place_type',
                    'next_token': {}}
    return (search_url, query_params)
def connect_to_endpoint(url, headers, params, next_token = None):
    params['next_token'] = next_token   #params object received from create_url function
    response = requests.request("GET", url, headers = headers, params = params)
    print("Endpoint Response Code: " + str(response.status_code))
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

In [4]:
def append_to_csv(json_response, fileName):
    #A counter variable
    counter = 0
    #Open OR create the target CSV file
    csvFile = open(fileName, "a", newline="", encoding='utf-8')
    csvWriter = csv.writer(csvFile)
    #Loop through each tweet
    for tweet in json_response['data']:
        # We will create a variable for each since some of the keys might not exist for some tweets
        # So we will account for that
        # 1. Author ID
        author_id = tweet['author_id']
        # 2. Time created
        created_at = dateutil.parser.parse(tweet['created_at'])
        # 3. Geolocation
        if ('geo' in tweet):   
            geo = tweet['geo']['place_id']
        else:
            geo = " "
        # 4. Tweet ID
        tweet_id = tweet['id']
        # 5. Language
        lang = tweet['lang']
        # 6. Tweet metrics
        retweet_count = tweet['public_metrics']['retweet_count']
        reply_count = tweet['public_metrics']['reply_count']
        like_count = tweet['public_metrics']['like_count']
        quote_count = tweet['public_metrics']['quote_count']
        # 7. source
        source = tweet['source']
        # 8. Tweet text
        text = tweet['text']
        # Assemble all data in a list
        res = [author_id, created_at, geo, tweet_id, lang, like_count, quote_count, reply_count, retweet_count, source, text]
        # Append the result to the CSV file
        csvWriter.writerow(res)
        counter += 1
    # When done, close the CSV file
    csvFile.close()
    # Print the number of tweets for this iteration
    print("# of Tweets added from this response: ", counter) 

In [5]:
%%time
bearer_token = auth()
headers = create_headers(bearer_token)
max_results = 500

CPU times: user 24 µs, sys: 1 µs, total: 25 µs
Wall time: 28.8 µs


In [11]:
%%time
arrayoftime=[]
datafile=keywords2[0]+".csv"
csvFile = open(datafile, "a", newline="", encoding='utf-8')
csvWriter = csv.writer(csvFile)
csvWriter.writerow(['author id', 'created_at', 'geo', 'id','lang', 'like_count', 'quote_count', 'reply_count','retweet_count','source','tweet'])
csvFile.close()
total_tweets = 0
print("Keyword:  ", keywords2[0])
req=0
for i in range(0,len(start_list)):
    # Inputs
    vpm=0
    print("Start Date: ", start_list[i])
    print("End Date: ", end_list[i])
    count = 0 # Counting tweets per time period
    max_count = 66667 # Max tweets per time period
    flag = True
    next_token = None
    # Check if flag is true
    while flag:
        # Check if max_count reached
        if count >= max_count:
            break
#         print("-------------------")
#         print("Token: ", next_token)
        url = create_url(keyword[0], start_list[i],end_list[i], max_results)
        json_response = connect_to_endpoint(url[0], headers, url[1], next_token)
        req=req+1
        print("Request:  ", req)
        result_count = json_response['meta']['result_count']
        vpm+=result_count

        if 'next_token' in json_response['meta']:
            # Save the token to use for next call
            next_token = json_response['meta']['next_token']
#             print("Next Token: ", next_token)
            if result_count is not None and result_count > 0 and next_token is not None:
#                 print("Start Date: ", start_list[i])
                append_to_csv(json_response, datafile)
                count += result_count
                total_tweets += result_count
#                 print("Total # of Tweets added: ", total_tweets)
#                 print("-------------------")
#                 time.sleep(5)                
        # If no next token exists
        else:
            if result_count is not None and result_count > 0:
#                 print("-------------------")
#                 print("Start Date: ", start_list[i])
                append_to_csv(json_response, datafile)
                count += result_count
                total_tweets += result_count
#                 print("Total # of Tweets added: ", total_tweets)
#                 print("-------------------")
#                 time.sleep(5)

            #Since this is the final request, turn flag to false to move to the next time period.
            flag = False
            next_token = None
        time.sleep(3)
    arrayoftime.append(vpm)
    time.sleep(3)
print("Total number of results: ", total_tweets)
for i in range(0,len(start_list)):
    print("Keyword:  ", keywords2[0])
    print("Start Date: ", start_list[i])
    print("End Date: ", end_list[i])
    print("Tweets Retreieved: ", arrayoftime[i])

Keyword:   Vaccine
Start Date:  2020-12-01T00:00:00.000Z
End Date:  2021-01-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   1
# of Tweets added from this response:  487
Endpoint Response Code: 200
Request:   2
# of Tweets added from this response:  487
Endpoint Response Code: 200
Request:   3
# of Tweets added from this response:  489
Endpoint Response Code: 200
Request:   4
# of Tweets added from this response:  491
Endpoint Response Code: 200
Request:   5
# of Tweets added from this response:  492
Endpoint Response Code: 200
Request:   6
# of Tweets added from this response:  486
Endpoint Response Code: 200
Request:   7
# of Tweets added from this response:  490
Endpoint Response Code: 200
Request:   8
# of Tweets added from this response:  486
Endpoint Response Code: 200
Request:   9
# of Tweets added from this response:  485
Endpoint Response Code: 200
Request:   10
# of Tweets added from this response:  485
Endpoint Response Code: 200
Request:   11
# of Tweets added from t

Endpoint Response Code: 200
Request:   96
# of Tweets added from this response:  495
Endpoint Response Code: 200
Request:   97
# of Tweets added from this response:  496
Endpoint Response Code: 200
Request:   98
# of Tweets added from this response:  489
Endpoint Response Code: 200
Request:   99
# of Tweets added from this response:  491
Endpoint Response Code: 200
Request:   100
# of Tweets added from this response:  487
Endpoint Response Code: 200
Request:   101
# of Tweets added from this response:  495
Endpoint Response Code: 200
Request:   102
# of Tweets added from this response:  496
Endpoint Response Code: 200
Request:   103
# of Tweets added from this response:  493
Endpoint Response Code: 200
Request:   104
# of Tweets added from this response:  489
Endpoint Response Code: 200
Request:   105
# of Tweets added from this response:  493
Endpoint Response Code: 200
Request:   106
# of Tweets added from this response:  493
Endpoint Response Code: 200
Request:   107
# of Tweets add

Endpoint Response Code: 200
Request:   191
# of Tweets added from this response:  493
Endpoint Response Code: 200
Request:   192
# of Tweets added from this response:  493
Endpoint Response Code: 200
Request:   193
# of Tweets added from this response:  490
Endpoint Response Code: 200
Request:   194
# of Tweets added from this response:  492
Endpoint Response Code: 200
Request:   195
# of Tweets added from this response:  488
Endpoint Response Code: 200
Request:   196
# of Tweets added from this response:  492
Endpoint Response Code: 200
Request:   197
# of Tweets added from this response:  492
Endpoint Response Code: 200
Request:   198
# of Tweets added from this response:  493
Endpoint Response Code: 200
Request:   199
# of Tweets added from this response:  494
Endpoint Response Code: 200
Request:   200
# of Tweets added from this response:  496
Endpoint Response Code: 200
Request:   201
# of Tweets added from this response:  489
Endpoint Response Code: 200
Request:   202
# of Tweets

Endpoint Response Code: 200
Request:   286
# of Tweets added from this response:  496
Endpoint Response Code: 200
Request:   287
# of Tweets added from this response:  498
Endpoint Response Code: 200
Request:   288
# of Tweets added from this response:  490
Endpoint Response Code: 200
Request:   289
# of Tweets added from this response:  493
Endpoint Response Code: 200
Request:   290
# of Tweets added from this response:  488
Endpoint Response Code: 200
Request:   291
# of Tweets added from this response:  494
Endpoint Response Code: 200
Request:   292
# of Tweets added from this response:  493
Endpoint Response Code: 200
Request:   293
Start Date:  2021-06-01T00:00:00.000Z
End Date:  2021-07-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   294
# of Tweets added from this response:  497
Endpoint Response Code: 200
Request:   295
# of Tweets added from this response:  497
Endpoint Response Code: 200
Request:   296
# of Tweets added from this response:  495
Endpoint Response Code:

Endpoint Response Code: 200
Request:   380
# of Tweets added from this response:  499
Endpoint Response Code: 200
Request:   381
# of Tweets added from this response:  499
Endpoint Response Code: 200
Request:   382
# of Tweets added from this response:  500
Endpoint Response Code: 200
Request:   383
# of Tweets added from this response:  500
Endpoint Response Code: 200
Request:   384
# of Tweets added from this response:  498
Endpoint Response Code: 200
Request:   385
# of Tweets added from this response:  500
Endpoint Response Code: 200
Request:   386
# of Tweets added from this response:  499
Endpoint Response Code: 200
Request:   387
# of Tweets added from this response:  500
Endpoint Response Code: 200
Request:   388
# of Tweets added from this response:  499
Endpoint Response Code: 200
Request:   389
# of Tweets added from this response:  499
Endpoint Response Code: 200
Request:   390
# of Tweets added from this response:  496
Endpoint Response Code: 200
Request:   391
# of Tweets

Endpoint Response Code: 200
Request:   475
# of Tweets added from this response:  500
Endpoint Response Code: 200
Request:   476
# of Tweets added from this response:  500
Endpoint Response Code: 200
Request:   477
# of Tweets added from this response:  500
Endpoint Response Code: 200
Request:   478
# of Tweets added from this response:  497
Endpoint Response Code: 200
Request:   479
# of Tweets added from this response:  499
Endpoint Response Code: 200
Request:   480
# of Tweets added from this response:  500
Endpoint Response Code: 200
Request:   481
# of Tweets added from this response:  499
Endpoint Response Code: 200
Request:   482
# of Tweets added from this response:  500
Endpoint Response Code: 200
Request:   483
# of Tweets added from this response:  498
Endpoint Response Code: 200
Request:   484
# of Tweets added from this response:  497
Endpoint Response Code: 200
Request:   485
# of Tweets added from this response:  498
Endpoint Response Code: 200
Request:   486
# of Tweets

Endpoint Response Code: 200
Request:   569
# of Tweets added from this response:  495
Endpoint Response Code: 200
Request:   570
# of Tweets added from this response:  498
Endpoint Response Code: 200
Request:   571
# of Tweets added from this response:  499
Endpoint Response Code: 200
Request:   572
# of Tweets added from this response:  499
Endpoint Response Code: 200
Request:   573
# of Tweets added from this response:  497
Start Date:  2021-12-01T00:00:00.000Z
End Date:  2022-01-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   574
# of Tweets added from this response:  497
Endpoint Response Code: 200
Request:   575
# of Tweets added from this response:  496
Endpoint Response Code: 200
Request:   576
# of Tweets added from this response:  493
Endpoint Response Code: 200
Request:   577
# of Tweets added from this response:  491
Endpoint Response Code: 200
Request:   578
# of Tweets added from this response:  497
Endpoint Response Code: 200
Request:   579
# of Tweets added from 

Endpoint Response Code: 200
Request:   662
# of Tweets added from this response:  496
Endpoint Response Code: 200
Request:   663
# of Tweets added from this response:  499
Endpoint Response Code: 200
Request:   664
# of Tweets added from this response:  498
Endpoint Response Code: 200
Request:   665
# of Tweets added from this response:  493
Endpoint Response Code: 200
Request:   666
# of Tweets added from this response:  495
Endpoint Response Code: 200
Request:   667
# of Tweets added from this response:  495
Endpoint Response Code: 200
Request:   668
# of Tweets added from this response:  497
Total number of results:  327991
Keyword:   Vaccine
Start Date:  2020-12-01T00:00:00.000Z
End Date:  2021-01-01T00:00:00.000Z
Tweets Retreieved:  28310
Keyword:   Vaccine
Start Date:  2021-01-01T00:00:00.000Z
End Date:  2021-02-01T00:00:00.000Z
Tweets Retreieved:  27367
Keyword:   Vaccine
Start Date:  2021-02-01T00:00:00.000Z
End Date:  2021-03-01T00:00:00.000Z
Tweets Retreieved:  21059
Keyword:

In [6]:
%%time
datafile=keywords2[1]+".csv"
csvFile = open(datafile, "a", newline="", encoding='utf-8')
csvWriter = csv.writer(csvFile)
csvWriter.writerow(['author id', 'created_at', 'geo', 'id','lang', 'like_count', 'quote_count', 'reply_count','retweet_count','source','tweet'])
csvFile.close()
total_tweets = 0
print("Keyword:  ", keywords2[1])
req=0
for i in range(0,len(start_list)):
    # Inputs
    print("Start Date: ", start_list[i])
    print("End Date: ", end_list[i])
    count = 0 # Counting tweets per time period
    max_count = 66667 # Max tweets per time period
    flag = True
    next_token = None
    # Check if flag is true
    while flag:
        # Check if max_count reached
        if count >= max_count:
            break
#         print("-------------------")
#         print("Token: ", next_token)
        url = create_url(keyword[1], start_list[i],end_list[i], max_results)
        json_response = connect_to_endpoint(url[0], headers, url[1], next_token)
        req=req+1
        print("Request:  ", req)
        result_count = json_response['meta']['result_count']

        if 'next_token' in json_response['meta']:
            # Save the token to use for next call
            next_token = json_response['meta']['next_token']
            print("Next Token: ", next_token)
            if result_count is not None and result_count > 0 and next_token is not None:
#                 print("Start Date: ", start_list[i])
                append_to_csv(json_response, datafile)
                count += result_count
                total_tweets += result_count
#                 print("Total # of Tweets added: ", total_tweets)
#                 print("-------------------")
#                 time.sleep(5)                
        # If no next token exists
        else:
            if result_count is not None and result_count > 0:
#                 print("-------------------")
#                 print("Start Date: ", start_list[i])
                append_to_csv(json_response, datafile)
                count += result_count
                total_tweets += result_count
#                 print("Total # of Tweets added: ", total_tweets)
#                 print("-------------------")
#                 time.sleep(5)

            #Since this is the final request, turn flag to false to move to the next time period.
            flag = False
            next_token = None
        time.sleep(3)
    time.sleep(3)
print("Total number of results: ", total_tweets)

Keyword:   Pfizer
Start Date:  2020-12-01T00:00:00.000Z
End Date:  2021-01-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   1
Next Token:  b26v89c19zqg8o3foshtdc4v5qss4vwf7i8xr1wr6jibh
# of Tweets added from this response:  487
Endpoint Response Code: 200
Request:   2
Next Token:  b26v89c19zqg8o3foshsy4dat3yhpctlyfj3u7m2f7iil
# of Tweets added from this response:  484
Endpoint Response Code: 200
Request:   3
Next Token:  b26v89c19zqg8o3foshsj0ul38m8skni64o8atbhdy0sd
# of Tweets added from this response:  485
Endpoint Response Code: 200
Request:   4
Next Token:  b26v89c19zqg8o3foshs3zjamkgvvktrb5vpnb64c1dvh
# of Tweets added from this response:  485
Endpoint Response Code: 200
Request:   5
Next Token:  b26v89c19zqg8o3foshrp2g908dn28r75hbx8c3bmsakd
# of Tweets added from this response:  493
Endpoint Response Code: 200
Request:   6
Next Token:  b26v89c19zqg8o3foshrors0n653suxc8c77oyrx0xjst
# of Tweets added from this response:  478
Endpoint Response Code: 200
Request:   7
Next Toke

Endpoint Response Code: 200
Request:   58
Start Date:  2021-03-01T00:00:00.000Z
End Date:  2021-04-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   59
Next Token:  b26v89c19zqg8o3fosqtjqdf39q6zes2zsunw5m201ta5
# of Tweets added from this response:  489
Endpoint Response Code: 200
Request:   60
Next Token:  b26v89c19zqg8o3fosqtjdk5pg6sxg30scg207u5od119
# of Tweets added from this response:  487
Endpoint Response Code: 200
Request:   61
Next Token:  b26v89c19zqg8o3fosqt4c7o1i7knixrqrczoyg524ku5
# of Tweets added from this response:  482
Endpoint Response Code: 200
Request:   62
Next Token:  b26v89c19zqg8o3fosqsp8qr8b3gxmfbtjfz6kyiyjvy5
# of Tweets added from this response:  486
Endpoint Response Code: 200
Request:   63
Next Token:  b26v89c19zqg8o3fosqsa33x2joyzm6deo8p7o9fi10u5
# of Tweets added from this response:  482
Endpoint Response Code: 200
Request:   64
Next Token:  b26v89c19zqg8o3fosqrv3uy85oqm6dffnh4x5i60nvy5
# of Tweets added from this response:  485
Endpoint Response Co

Endpoint Response Code: 200
Request:   115
Next Token:  b26v89c19zqg8o3foswqk5sz0im5aumr5e6kdf1un80ot
# of Tweets added from this response:  489
Endpoint Response Code: 200
Request:   116
Next Token:  b26v89c19zqg8o3fostul35qywj4gi9nyigaiohctjzwd
# of Tweets added from this response:  486
Endpoint Response Code: 200
Request:   117
Next Token:  b26v89c19zqg8o3fostuksj0aukoklh6ty3rvn33ie3jx
# of Tweets added from this response:  490
Endpoint Response Code: 200
Request:   118
Next Token:  b26v89c19zqg8o3fostukjz7lcc6dfhxormt3j70z00ot
# of Tweets added from this response:  489
Endpoint Response Code: 200
Request:   119
Next Token:  b26v89c19zqg8o3fostu5krqnnsdhhi7iyl02jnzkuxz1
# of Tweets added from this response:  469
Endpoint Response Code: 200
Request:   120
# of Tweets added from this response:  488
Start Date:  2021-06-01T00:00:00.000Z
End Date:  2021-07-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   121
Next Token:  b26v89c19zqg8o3fpdgappc28v85g9pugwxmr9ut4s3gd
# of Tweets a

Endpoint Response Code: 200
Request:   172
Next Token:  b26v89c19zqg8o3fpdm9ubrlzwggr0738fz6a6f22o6f1
# of Tweets added from this response:  498
Endpoint Response Code: 200
Request:   173
Next Token:  b26v89c19zqg8o3fpdm9u13ofbieli48lnv1wm2mxjb0d
# of Tweets added from this response:  493
Endpoint Response Code: 200
Request:   174
Next Token:  b26v89c19zqg8o3fpdm9f41jpycwwi7a52upoq8p5l4sd
# of Tweets added from this response:  495
Endpoint Response Code: 200
Request:   175
Next Token:  b26v89c19zqg8o3fpdm902njzcwz2b4eaosv7cufoly7x
# of Tweets added from this response:  497
Endpoint Response Code: 200
Request:   176
Next Token:  b26v89c19zqg8o3fpdm8zlmnakxlsr02w4toh38ujulx9
# of Tweets added from this response:  500
Endpoint Response Code: 200
Request:   177
Next Token:  b26v89c19zqg8o3fpdm8kqobwlepe7z0zcjbnqn9edn5p
# of Tweets added from this response:  499
Endpoint Response Code: 200
Request:   178
Next Token:  b26v89c19zqg8o3fpdm8ki6mq2dg4tdk225l9eab8ux6l
# of Tweets added from this 

Endpoint Response Code: 200
Request:   229
Next Token:  b26v89c19zqg8o3fpdy6ey8qnfm73lxhbur9o67vf2rul
# of Tweets added from this response:  492
Endpoint Response Code: 200
Request:   230
Next Token:  b26v89c19zqg8o3fpdy5zur839rygh2i9j21gy0u4xo59
# of Tweets added from this response:  497
Endpoint Response Code: 200
Request:   231
Next Token:  b26v89c19zqg8o3fpdy5zocg2ei42nb25ppkv5v0m4g3h
# of Tweets added from this response:  496
Endpoint Response Code: 200
Request:   232
Next Token:  b26v89c19zqg8o3fpdy5zhz67aacw118b6brl1jbbrtdp
# of Tweets added from this response:  495
Endpoint Response Code: 200
Request:   233
Next Token:  b26v89c19zqg8o3fpdy5kp5vx4sysv2s5qy9bd5rlqwe5
# of Tweets added from this response:  497
Endpoint Response Code: 200
Request:   234
Next Token:  b26v89c19zqg8o3fpdy5kgls64j0qrtis5ipkrk2gf9fh
# of Tweets added from this response:  496
Endpoint Response Code: 200
Request:   235
Next Token:  b26v89c19zqg8o3fpdv9l3btl0zyedqz3dmxz3pmez4hp
# of Tweets added from this 

Endpoint Response Code: 200
Request:   286
Next Token:  b26v89c19zqg8o3fpe4548pdntbj8pn6r0j8le5upeohp
# of Tweets added from this response:  495
Endpoint Response Code: 200
Request:   287
Next Token:  b26v89c19zqg8o3fpe194xk4rf15uyy2jb4pgku7gkrct
# of Tweets added from this response:  490
Endpoint Response Code: 200
Request:   288
Next Token:  b26v89c19zqg8o3fpe194p1u1isx8l0xqhgjorjs6j24d
# of Tweets added from this response:  495
Endpoint Response Code: 200
Request:   289
Next Token:  b26v89c19zqg8o3fpe18pu0tsus2g6u7iz5c50qw4j2bh
# of Tweets added from this response:  495
Endpoint Response Code: 200
Request:   290
Next Token:  b26v89c19zqg8o3fpe18pf5j8y2my2i3vid5a4dig6rnh
# of Tweets added from this response:  489
Endpoint Response Code: 200
Request:   291
Next Token:  b26v89c19zqg8o3fpe18ama5cs5euqkocjtnqhjgc2rgd
# of Tweets added from this response:  492
Endpoint Response Code: 200
Request:   292
Next Token:  b26v89c19zqg8o3fpe18a9khs7v6ibd1lvzn88ixgp4l9
# of Tweets added from this 

In [9]:
%%time
arrayoftime=[]
datafile=keywords2[2]+".csv"
csvFile = open(datafile, "a", newline="", encoding='utf-8')
csvWriter = csv.writer(csvFile)
csvWriter.writerow(['author id', 'created_at', 'geo', 'id','lang', 'like_count', 'quote_count', 'reply_count','retweet_count','source','tweet'])
csvFile.close()
total_tweets = 0
print("Keyword:  ", keywords2[2])
req=0
for i in range(0,len(start_list)):
    # Inputs
    vpm=0
    print("Start Date: ", start_list[i])
    print("End Date: ", end_list[i])
    count = 0 # Counting tweets per time period
    max_count = 66667 # Max tweets per time period
    flag = True
    next_token = None
    # Check if flag is true
    while flag:
        # Check if max_count reached
        if count >= max_count:
            break
#         print("-------------------")
#         print("Token: ", next_token)
        url = create_url(keyword[2], start_list[i],end_list[i], max_results)
        json_response = connect_to_endpoint(url[0], headers, url[1], next_token)
        req=req+1
        print("Request:  ", req)
        result_count = json_response['meta']['result_count']
        vpm+=result_count

        if 'next_token' in json_response['meta']:
            # Save the token to use for next call
            next_token = json_response['meta']['next_token']
#             print("Next Token: ", next_token)
            if result_count is not None and result_count > 0 and next_token is not None:
#                 print("Start Date: ", start_list[i])
                append_to_csv(json_response, datafile)
                count += result_count
                total_tweets += result_count
#                 print("Total # of Tweets added: ", total_tweets)
#                 print("-------------------")
#                 time.sleep(5)                
        # If no next token exists
        else:
            if result_count is not None and result_count > 0:
#                 print("-------------------")
#                 print("Start Date: ", start_list[i])
                append_to_csv(json_response, datafile)
                count += result_count
                total_tweets += result_count
#                 print("Total # of Tweets added: ", total_tweets)
#                 print("-------------------")
#                 time.sleep(5)

            #Since this is the final request, turn flag to false to move to the next time period.
            flag = False
            next_token = None
        time.sleep(3)
    arrayoftime.append(vpm)
    time.sleep(3)
print("Total number of results: ", total_tweets)
for i in range(0,len(start_list)):
    print("Keyword:  ", keywords2[2])
    print("Start Date: ", start_list[i])
    print("End Date: ", end_list[i])
    print("Tweets Retreieved: ", arrayoftime[i])

Keyword:   moderna
Start Date:  2020-12-01T00:00:00.000Z
End Date:  2021-01-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   1
# of Tweets added from this response:  490
Endpoint Response Code: 200
Request:   2
# of Tweets added from this response:  478
Endpoint Response Code: 200
Request:   3
# of Tweets added from this response:  496
Endpoint Response Code: 200
Request:   4
# of Tweets added from this response:  494
Endpoint Response Code: 200
Request:   5
# of Tweets added from this response:  493
Endpoint Response Code: 200
Request:   6
# of Tweets added from this response:  494
Endpoint Response Code: 200
Request:   7
# of Tweets added from this response:  492
Endpoint Response Code: 200
Request:   8
# of Tweets added from this response:  495
Endpoint Response Code: 200
Request:   9
# of Tweets added from this response:  485
Endpoint Response Code: 200
Request:   10
# of Tweets added from this response:  474
Endpoint Response Code: 200
Request:   11
# of Tweets added from t

Endpoint Response Code: 200
Request:   91
# of Tweets added from this response:  499
Endpoint Response Code: 200
Request:   92
# of Tweets added from this response:  498
Endpoint Response Code: 200
Request:   93
# of Tweets added from this response:  478
Endpoint Response Code: 200
Request:   94
# of Tweets added from this response:  497
Endpoint Response Code: 200
Request:   95
# of Tweets added from this response:  495
Endpoint Response Code: 200
Request:   96
# of Tweets added from this response:  498
Endpoint Response Code: 200
Request:   97
# of Tweets added from this response:  493
Endpoint Response Code: 200
Request:   98
# of Tweets added from this response:  407
Start Date:  2021-11-01T00:00:00.000Z
End Date:  2021-12-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   99
# of Tweets added from this response:  495
Endpoint Response Code: 200
Request:   100
# of Tweets added from this response:  491
Endpoint Response Code: 200
Request:   101
# of Tweets added from this resp

In [7]:
array=[]
array.append(3)
array.append(2)
print(array[0])

3


In [10]:
%%time
arrayoftime=[]
datafile=keywords2[3]+".csv"
csvFile = open(datafile, "a", newline="", encoding='utf-8')
csvWriter = csv.writer(csvFile)
csvWriter.writerow(['author id', 'created_at', 'geo', 'id','lang', 'like_count', 'quote_count', 'reply_count','retweet_count','source','tweet'])
csvFile.close()
total_tweets = 0
print("Keyword:  ", keywords2[3])
req=0
for i in range(0,len(start_list)):
    # Inputs
    vpm=0
    print("Start Date: ", start_list[i])
    print("End Date: ", end_list[i])
    count = 0 # Counting tweets per time period
    max_count = 66667 # Max tweets per time period
    flag = True
    next_token = None
    # Check if flag is true
    while flag:
        # Check if max_count reached
        if count >= max_count:
            break
#         print("-------------------")
#         print("Token: ", next_token)
        url = create_url(keyword[3], start_list[i],end_list[i], max_results)
        json_response = connect_to_endpoint(url[0], headers, url[1], next_token)
        req=req+1
        print("Request:  ", req)
        result_count = json_response['meta']['result_count']
        vpm+=result_count

        if 'next_token' in json_response['meta']:
            # Save the token to use for next call
            next_token = json_response['meta']['next_token']
#             print("Next Token: ", next_token)
            if result_count is not None and result_count > 0 and next_token is not None:
#                 print("Start Date: ", start_list[i])
                append_to_csv(json_response, datafile)
                count += result_count
                total_tweets += result_count
#                 print("Total # of Tweets added: ", total_tweets)
#                 print("-------------------")
#                 time.sleep(5)                
        # If no next token exists
        else:
            if result_count is not None and result_count > 0:
#                 print("-------------------")
#                 print("Start Date: ", start_list[i])
                append_to_csv(json_response, datafile)
                count += result_count
                total_tweets += result_count
#                 print("Total # of Tweets added: ", total_tweets)
#                 print("-------------------")
#                 time.sleep(5)

            #Since this is the final request, turn flag to false to move to the next time period.
            flag = False
            next_token = None
        time.sleep(3)
    arrayoftime.append(vpm)
    time.sleep(3)
print("Total number of results: ", total_tweets)
for i in range(0,len(start_list)):
    print("Keyword:  ", keywords2[3])
    print("Start Date: ", start_list[i])
    print("End Date: ", end_list[i])
    print("Tweets Retreieved: ", arrayoftime[i])

Keyword:   sinopharm
Start Date:  2020-12-01T00:00:00.000Z
End Date:  2021-01-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   1
# of Tweets added from this response:  492
Start Date:  2021-01-01T00:00:00.000Z
End Date:  2021-02-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   2
# of Tweets added from this response:  490
Endpoint Response Code: 200
Request:   3
# of Tweets added from this response:  483
Start Date:  2021-02-01T00:00:00.000Z
End Date:  2021-03-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   4
# of Tweets added from this response:  496
Endpoint Response Code: 200
Request:   5
# of Tweets added from this response:  486
Start Date:  2021-03-01T00:00:00.000Z
End Date:  2021-04-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   6
# of Tweets added from this response:  493
Endpoint Response Code: 200
Request:   7
# of Tweets added from this response:  493
Start Date:  2021-04-01T00:00:00.000Z
End Date:  2021-05-01T00:00:00.000Z
Endpoint Response Cod

In [13]:
%%time
arrayoftime=[]
datafile=keywords2[4]+".csv"
csvFile = open(datafile, "a", newline="", encoding='utf-8')
csvWriter = csv.writer(csvFile)
csvWriter.writerow(['author id', 'created_at', 'geo', 'id','lang', 'like_count', 'quote_count', 'reply_count','retweet_count','source','tweet'])
csvFile.close()
total_tweets = 0
print("Keyword:  ", keywords2[4])
req=0
for i in range(0,len(start_list)):
    # Inputs
    vpm=0
    print("Start Date: ", start_list[i])
    print("End Date: ", end_list[i])
    count = 0 # Counting tweets per time period
    max_count = 66667 # Max tweets per time period
    flag = True
    next_token = None
    # Check if flag is true
    while flag:
        # Check if max_count reached
        if count >= max_count:
            break
#         print("-------------------")
#         print("Token: ", next_token)
        url = create_url(keyword[4], start_list[i],end_list[i], max_results)
        json_response = connect_to_endpoint(url[0], headers, url[1], next_token)
        req=req+1
        print("Request:  ", req)
        result_count = json_response['meta']['result_count']
        vpm+=result_count

        if 'next_token' in json_response['meta']:
            # Save the token to use for next call
            next_token = json_response['meta']['next_token']
#             print("Next Token: ", next_token)
            if result_count is not None and result_count > 0 and next_token is not None:
#                 print("Start Date: ", start_list[i])
                append_to_csv(json_response, datafile)
                count += result_count
                total_tweets += result_count
#                 print("Total # of Tweets added: ", total_tweets)
#                 print("-------------------")
#                 time.sleep(5)                
        # If no next token exists
        else:
            if result_count is not None and result_count > 0:
#                 print("-------------------")
#                 print("Start Date: ", start_list[i])
                append_to_csv(json_response, datafile)
                count += result_count
                total_tweets += result_count
#                 print("Total # of Tweets added: ", total_tweets)
#                 print("-------------------")
#                 time.sleep(5)

            #Since this is the final request, turn flag to false to move to the next time period.
            flag = False
            next_token = None
        time.sleep(3)
    arrayoftime.append(vpm)
    time.sleep(3)
print("Total number of results: ", total_tweets)
for i in range(0,len(start_list)):
    print("Keyword:  ", keywords2[4])
    print("Start Date: ", start_list[i])
    print("End Date: ", end_list[i])
    print("Tweets Retreieved: ", arrayoftime[i])

Keyword:   Johnson
Start Date:  2020-12-01T00:00:00.000Z
End Date:  2021-01-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   1
# of Tweets added from this response:  488
Endpoint Response Code: 200
Request:   2
Start Date:  2021-01-01T00:00:00.000Z
End Date:  2021-02-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   3
# of Tweets added from this response:  490
Endpoint Response Code: 200
Request:   4
# of Tweets added from this response:  494
Endpoint Response Code: 200
Request:   5
# of Tweets added from this response:  490
Endpoint Response Code: 200
Request:   6
Start Date:  2021-02-01T00:00:00.000Z
End Date:  2021-03-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   7
# of Tweets added from this response:  491
Endpoint Response Code: 200
Request:   8
# of Tweets added from this response:  490
Endpoint Response Code: 200
Request:   9
# of Tweets added from this response:  495
Endpoint Response Code: 200
Request:   10
# of Tweets added from this response:  492
Star

In [14]:
%%time
arrayoftime=[]
datafile=keywords2[5]+".csv"
csvFile = open(datafile, "a", newline="", encoding='utf-8')
csvWriter = csv.writer(csvFile)
csvWriter.writerow(['author id', 'created_at', 'geo', 'id','lang', 'like_count', 'quote_count', 'reply_count','retweet_count','source','tweet'])
csvFile.close()
total_tweets = 0
print("Keyword:  ", keywords2[5])
req=0
for i in range(0,len(start_list)):
    # Inputs
    vpm=0
    print("Start Date: ", start_list[i])
    print("End Date: ", end_list[i])
    count = 0 # Counting tweets per time period
    max_count = 66667 # Max tweets per time period
    flag = True
    next_token = None
    # Check if flag is true
    while flag:
        # Check if max_count reached
        if count >= max_count:
            break
#         print("-------------------")
#         print("Token: ", next_token)
        url = create_url(keyword[5], start_list[i],end_list[i], max_results)
        json_response = connect_to_endpoint(url[0], headers, url[1], next_token)
        req=req+1
        print("Request:  ", req)
        result_count = json_response['meta']['result_count']
        vpm+=result_count

        if 'next_token' in json_response['meta']:
            # Save the token to use for next call
            next_token = json_response['meta']['next_token']
#             print("Next Token: ", next_token)
            if result_count is not None and result_count > 0 and next_token is not None:
#                 print("Start Date: ", start_list[i])
                append_to_csv(json_response, datafile)
                count += result_count
                total_tweets += result_count
#                 print("Total # of Tweets added: ", total_tweets)
#                 print("-------------------")
#                 time.sleep(5)                
        # If no next token exists
        else:
            if result_count is not None and result_count > 0:
#                 print("-------------------")
#                 print("Start Date: ", start_list[i])
                append_to_csv(json_response, datafile)
                count += result_count
                total_tweets += result_count
#                 print("Total # of Tweets added: ", total_tweets)
#                 print("-------------------")
#                 time.sleep(5)

            #Since this is the final request, turn flag to false to move to the next time period.
            flag = False
            next_token = None
        time.sleep(3)
    arrayoftime.append(vpm)
    time.sleep(3)
print("Total number of results: ", total_tweets)
for i in range(0,len(start_list)):
    print("Keyword:  ", keywords2[5])
    print("Start Date: ", start_list[i])
    print("End Date: ", end_list[i])
    print("Tweets Retreieved: ", arrayoftime[i])

Keyword:   antivax
Start Date:  2020-12-01T00:00:00.000Z
End Date:  2021-01-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   1
# of Tweets added from this response:  465
Endpoint Response Code: 200
Request:   2
# of Tweets added from this response:  454
Start Date:  2021-01-01T00:00:00.000Z
End Date:  2021-02-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   3
# of Tweets added from this response:  469
Endpoint Response Code: 200
Request:   4
# of Tweets added from this response:  468
Start Date:  2021-02-01T00:00:00.000Z
End Date:  2021-03-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   5
# of Tweets added from this response:  469
Endpoint Response Code: 200
Request:   6
# of Tweets added from this response:  474
Start Date:  2021-03-01T00:00:00.000Z
End Date:  2021-04-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   7
# of Tweets added from this response:  479
Endpoint Response Code: 200
Request:   8
# of Tweets added from this response:  468
Start Date:  

In [16]:
%%time
arrayoftime=[]
datafile=keywords2[6]+".csv"
csvFile = open(datafile, "a", newline="", encoding='utf-8')
csvWriter = csv.writer(csvFile)
csvWriter.writerow(['author id', 'created_at', 'geo', 'id','lang', 'like_count', 'quote_count', 'reply_count','retweet_count','source','tweet'])
csvFile.close()
total_tweets = 0
print("Keyword:  ", keywords2[6])
req=0
for i in range(10,len(start_list)):
    # Inputs
    vpm=0
    print("Start Date: ", start_list[i])
    print("End Date: ", end_list[i])
    count = 0 # Counting tweets per time period
    max_count = 66667 # Max tweets per time period
    flag = True
    next_token = None
    # Check if flag is true
    while flag:
        # Check if max_count reached
        if count >= max_count:
            break
#         print("-------------------")
#         print("Token: ", next_token)
        url = create_url(keyword[6], start_list[i],end_list[i], max_results)
        json_response = connect_to_endpoint(url[0], headers, url[1], next_token)
        req=req+1
        print("Request:  ", req)
        result_count = json_response['meta']['result_count']
        vpm+=result_count

        if 'next_token' in json_response['meta']:
            # Save the token to use for next call
            next_token = json_response['meta']['next_token']
#             print("Next Token: ", next_token)
            if result_count is not None and result_count > 0 and next_token is not None:
#                 print("Start Date: ", start_list[i])
                append_to_csv(json_response, datafile)
                count += result_count
                total_tweets += result_count
#                 print("Total # of Tweets added: ", total_tweets)
#                 print("-------------------")
#                 time.sleep(5)                
        # If no next token exists
        else:
            if result_count is not None and result_count > 0:
#                 print("-------------------")
#                 print("Start Date: ", start_list[i])
                append_to_csv(json_response, datafile)
                count += result_count
                total_tweets += result_count
#                 print("Total # of Tweets added: ", total_tweets)
#                 print("-------------------")
#                 time.sleep(5)

            #Since this is the final request, turn flag to false to move to the next time period.
            flag = False
            next_token = None
        time.sleep(3)
    arrayoftime.append(vpm)
    time.sleep(3)
print("Total number of results: ", total_tweets)
for i in range(0,len(start_list)):
    print("Keyword:  ", keywords2[6])
    print("Start Date: ", start_list[i])
    print("End Date: ", end_list[i])
    print("Tweets Retreieved: ", arrayoftime[i])

Keyword:   Booster
Start Date:  2021-10-01T00:00:00.000Z
End Date:  2021-11-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   1
# of Tweets added from this response:  498
Endpoint Response Code: 200
Request:   2
# of Tweets added from this response:  498
Endpoint Response Code: 200
Request:   3
# of Tweets added from this response:  497
Endpoint Response Code: 200
Request:   4
# of Tweets added from this response:  498
Endpoint Response Code: 200
Request:   5
# of Tweets added from this response:  498
Endpoint Response Code: 200
Request:   6
# of Tweets added from this response:  499
Start Date:  2021-11-01T00:00:00.000Z
End Date:  2021-12-01T00:00:00.000Z
Endpoint Response Code: 200
Request:   7
# of Tweets added from this response:  497
Endpoint Response Code: 200
Request:   8
# of Tweets added from this response:  498
Endpoint Response Code: 200
Request:   9
# of Tweets added from this response:  495
Endpoint Response Code: 200
Request:   10
# of Tweets added from this respons

IndexError: list index out of range

In [19]:
n=0
for i in range(10,len(start_list)):
    print("Keyword:  ", keywords2[6])
    print("Start Date: ", start_list[i])
    print("End Date: ", end_list[i])
    print("Tweets Retreieved: ", arrayoftime[n])
    n+=1

Keyword:   Booster
Start Date:  2021-10-01T00:00:00.000Z
End Date:  2021-11-01T00:00:00.000Z
Tweets Retreieved:  2988
Keyword:   Booster
Start Date:  2021-11-01T00:00:00.000Z
End Date:  2021-12-01T00:00:00.000Z
Tweets Retreieved:  3970
Keyword:   Booster
Start Date:  2021-12-01T00:00:00.000Z
End Date:  2022-01-01T00:00:00.000Z
Tweets Retreieved:  6444
Keyword:   Booster
Start Date:  2022-01-01T00:00:00.000Z
End Date:  2022-02-01T00:00:00.000Z
Tweets Retreieved:  4435
Keyword:   Booster
Start Date:  2022-02-01T00:00:00.000Z
End Date:  2022-02-18T00:00:00.000Z
Tweets Retreieved:  1493
