# Import libraries

In [1]:
# For sending GET requests from the API
import requests
# For dealing with json responses we receive from the API
import json
# For displaying the data after
import pandas as pd
# For saving the response data in CSV format
import csv
# For parsing the dates received from twitter in readable formats
import datetime
import dateutil.parser
import unicodedata
#To add wait time between requests
import time
import creds # Credentials from Twitter are saved in this file as creds.py
import os

# Supporting functions

In [2]:
# Token to access Twitter API

def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(creds.Bearer_Token)}
    return headers

# The query to download the data from Twitter
def create_url(keyword, start_date, end_date, max_results = 15):
    
    search_url = "https://api.twitter.com/2/tweets/search/all" #Change to the endpoint you want to collect data from

    #Downloading query parameters
    query_params = {'query': keyword,
                    'start_time': start_date,
                    'end_time': end_date,
                    'max_results': max_results,
                    'expansions': 'author_id,in_reply_to_user_id,geo.place_id',
                    'tweet.fields': 'id,text,author_id,in_reply_to_user_id,geo,conversation_id,created_at,lang,public_metrics,referenced_tweets,reply_settings,source',
                    'user.fields': 'id,name,username,created_at,description,public_metrics,verified',
                    'place.fields': 'full_name,id,country,country_code,geo,name,place_type',
                    'next_token': {}}
    return (search_url, query_params)

# Function to connect to the API
def connect_to_endpoint(url, headers, params, next_token = None):
    params['next_token'] = next_token   #params object received from create_url function
    response = requests.request("GET", url, headers = headers, params = params)
    print("Endpoint Response Code: " + str(response.status_code))
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

In [39]:
#To download daily data we need a loop, this function creates the lists used in the loop

#Format: Month/Days/Years
'''Start: first day of month
    End: First day of next month '''

daterange = pd.date_range(start='01/01/2019', end='03/01/2019').strftime('%Y-%m-%dT%H:%M:%SZ')

start_list = []
end_list = []

for i in range(0,len(daterange)-1):
    start_list.append(daterange[i])
    end_list.append(daterange[i+1])

# Download tweets

In [None]:
bearer_token = creds.Bearer_Token    #Bearer token is saved in creds.py
headers = create_headers(bearer_token) 
keyword = "cardano -is:retweet -giveaway -ethereum -bitcoin lang:en" # query tp download cardano tweets

max_results = 500

total_tweets = 0

# Loop to download data
for i in range(0,len(start_list)):

    # Inputs
    count = 0                       # Counting tweets per time period
    max_count = 1000                # Max tweets per time period
    flag = True
    next_token = None
        # Check if flag is true
    while flag:
        # Check if max_count reached
        if count >= max_count:
            break
        print("-------------------")
        print("Token: ", next_token)
        url = create_url(keyword, start_list[i],end_list[i], max_results)
        json_response = connect_to_endpoint(url[0], headers, url[1], next_token)
        result_count = json_response['meta']['result_count']

            # Next token is used to reach more tweets
        if 'next_token' in json_response['meta']:
            # Save the token to use for next call
            next_token = json_response['meta']['next_token']
            print("Next Token: ", next_token)
            if result_count is not None and result_count > 0 and next_token is not None:
                print("Start Date: ", start_list[i])
                filename = (f'{start_list[i]}'.replace(":", "-") + f'{next_token}' + '.json')
                with open(filename, 'w') as f:
                    json.dump(json_response, f)                      #Create JSON for the downloaded data
                count += result_count
                total_tweets += result_count
                print("Total # of Tweets added: ", total_tweets)
                print("-------------------")
                time.sleep(3)                
        # If no next token exists
        else:
            if result_count is not None and result_count > 0:
                print("-------------------")
                print("Start Date: ", start_list[i])
                filename = (f'{start_list[i]}'.replace(":", "-") + f'{next_token}' + '.json')
                with open(filename, 'w') as f:
                    json.dump(json_response, f)                      #Create JSON for the downloaded data
                count += result_count
                total_tweets += result_count
                print("Total # of Tweets added: ", total_tweets)
                print("-------------------")
                time.sleep(3)
            
            #Since this is the final request, turn flag to false to move to the next time period.
            flag = False
            next_token = None
        time.sleep(3)

print("Total number of results: ", total_tweets)




# References:

- https://towardsdatascience.com/an-extensive-guide-to-collecting-tweets-from-twitter-api-v2-for-academic-research-using-python-3-518fcb71df2a

- https://github.com/JustAnotherArchivist/snscrape/issues/459

- https://mihaelagrigore.medium.com/scraping-historical-tweets-with-twitter-api-v2-3f55e7263d33#4.-Scraping-rate-limits

- https://developer.twitter.com/