In [1]:
# import libraries
import pandas as pd
import requests
import os
import json
import csv
import time
from dotenv import load_dotenv
# For parsing the dates received from twitter in readable formats
import datetime
import dateutil.parser
import unicodedata

In [2]:
# load environment variables in the .env file in the same directory
# You need to have .env file and set BEARER_TOKEN='PUT TWITTER BEARER TOKEN HERE'
load_dotenv()

True

In [3]:
# set headers for API request
def create_headers():
    bearer_token = os.environ.get("BEARER_TOKEN")
    return {"Authorization": "Bearer {}".format(bearer_token)}

In [5]:
# Function to generate URL and Parameters for API call
def create_url(keyword, start_time, end_time, max_results=10):
    
    search_url = "https://api.twitter.com/2/tweets/search/recent"
    
    query_params = {
        'query': keyword,
        'start_time': start_time,
        'end_time': end_time,
        'max_results': max_results,
        'expansions': 'author_id,in_reply_to_user_id,geo.place_id',
        'tweet.fields': 'id,text,author_id,in_reply_to_user_id,geo,conversation_id,created_at,lang,public_metrics,referenced_tweets,reply_settings,source',
        'user.fields': 'id,name,username,created_at,description,public_metrics,verified',
        'place.fields': 'full_name,id,country,country_code,geo,name,place_type',
        #'place.fields': 'full_name,country',
        'next_token':{}
    }
    return (search_url, query_params)

In [6]:
# Function to make GET request to desired endpoint
def connect_to_endpoint(url, headers, params, next_token = None):
    params['next_token'] = next_token
    response = requests.request("GET", url, params=params, headers=headers)
        
    print("Endpoint Response Code: " + str(response.status_code))
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

In [7]:
#Inputs for the request
bearer_token = os.environ.get("BEARER_TOKEN")
headers = create_headers()
keyword = "ウーバーイーツ"
start_time = "2021-12-26T00:00:00.000Z"
end_time = "2021-12-26T00:10:00.000Z"
max_results = 15

In [8]:
# Call API
url = create_url(keyword, start_time, end_time, max_results)
json_response = connect_to_endpoint(url[0], headers, url[1])
print("url0: ",url[0])
print("headers: ",headers)
print("url1: ",url[1])

Endpoint Response Code: 200
url0:  https://api.twitter.com/2/tweets/search/recent
headers:  {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAG51XQEAAAAAMcF6nznSVdDwCYaFEcArB%2BYZtvI%3Dei5Ub8zUyuqA05rY9Pne1KZAXV4vWctMFVXx0xjXZaWAQ8Q7EH'}
url1:  {'place.fields': 'full_name,id,country,country_code,geo,name,place_type', 'start_time': '2021-12-26T00:00:00.000Z', 'end_time': '2021-12-26T00:10:00.000Z', 'tweet.fields': 'id,text,author_id,in_reply_to_user_id,geo,conversation_id,created_at,lang,public_metrics,referenced_tweets,reply_settings,source', 'next_token': None, 'max_results': 15, 'query': 'ウーバーイーツ', 'user.fields': 'id,name,username,created_at,description,public_metrics,verified', 'expansions': 'author_id,in_reply_to_user_id,geo.place_id'}


In [9]:
# Function to make actual API call
def make_request(headers):
    url = "https://api.twitter.com/2/tweets/search/recent"
    query_params = {
        'query': 'ウーバーイーツ',
        'start_time': "2021-12-26T00:00:00.000Z",
        'end_time': "2021-12-26T00:10:00.000Z",
        'max_results': 10,
        'place.fields': 'full_name,country',
        'next_token':{}
    }
    return requests.request("GET", url, params=query_params, headers=headers).json()
    
response = make_request(headers)
data = response['data']
meta = response['meta']

In [23]:
# Create output csv file
csvFile = open("result.csv", "a", newline="", encoding='utf-8')
csvWriter = csv.writer(csvFile)

#Create headers for the data you want to save, in this example, we only want save these columns in our dataset
csvWriter.writerow(['author id', 'created_at', 'geo', 'id','lang', 'like_count', 'quote_count', 'reply_count','retweet_count','source','tweet'])
csvFile.close()

In [24]:
def append_to_csv(json_response, fileName):

    #A counter variable
    counter = 0

    #Open OR create the target CSV file
    csvFile = open(fileName, "a", newline="", encoding='utf-8')
    csvWriter = csv.writer(csvFile)

    #Loop through each tweet
    for tweet in json_response['data']:
        
        # We will create a variable for each since some of the keys might not exist for some tweets
        # So we will account for that
        
        
        # 1. Author ID
        author_id = tweet['author_id']

        # 2. Time created
        created_at = dateutil.parser.parse(tweet['created_at'])

        # 3. Geolocation
        if ('geo' in tweet):   
            geo = tweet['geo']['place_id']
        else:
            geo = " "
        
        # 4. Tweet ID
        tweet_id = tweet['id']
        
        # 5. Language
        lang = tweet['lang']

        # 6. Tweet metrics
        retweet_count = tweet['public_metrics']['retweet_count']
        reply_count = tweet['public_metrics']['reply_count']
        like_count = tweet['public_metrics']['like_count']
        quote_count = tweet['public_metrics']['quote_count']

        # 7. source
        source = tweet['source']
        
        # 8. Tweet text
        text = tweet['text']
        
        # Assemble all data in a list
        #res = [tweet_id, text]
        res = [author_id, created_at, geo, tweet_id, lang, like_count, quote_count, reply_count, retweet_count, source, text]
        
        # Append the result to the CSV file
        csvWriter.writerow(res)
        counter += 1

    # When done, close the CSV file
    csvFile.close()

    # Print the number of tweets for this iteration
    print("# of Tweets added from this response: ", counter) 

In [25]:
# Converting to dataframe for friendly view
def make_df(response):
    return pd.DataFrame(response['data'])

In [27]:
# Pagination for tweets over results > 100
total_tweets = 0
max_results = 100

count = 0
max_count = 100
flag = True
next_token = None

while flag:
    if count >= max_count:
        break
    print("-----------------")
    print("Token: ", next_token)
    print(count)
    url = create_url(keyword, start_time, end_time, max_results)
    json_response = connect_to_endpoint(url[0], headers, url[1], next_token)
    result_count = json_response['meta']['result_count']
    #count += 10
    
    if 'next_token' in json_response['meta']:
        # save the token to use for next call
        next_token = json_response['meta']['next_token']
        print("Next Token: ", next_token)
        if result_count is not None and result_count > 0 and next_token is not None:
            append_to_csv(json_response, "result.csv")
            df = make_df(json_response)
           # count += 10
            total_tweets += result_count
            print("Total # of Tweets added: ", total_tweets)
            print("-----------------")
            time.sleep(5)
    # if no next token exists
    else:
        if result_count is not None and result_count > 0:
            print("-----------------")
            append_to_csv(json_response, "result.csv")
            df = make_df(json_response)
           # count += 10
            total_tweets += result_count
            print("Total # of Tweets added: ", total_tweets)
            print("-----------------")
            time.sleep(5)
            
        flag = False
        next_token = None
    time.sleep(5)
print("Total number of results: " , total_tweets)
df

-----------------
Token:  None
0
Endpoint Response Code: 200
-----------------
# of Tweets added from this response:  46
Total # of Tweets added:  46
-----------------
Total number of results:  46


Unnamed: 0,author_id,conversation_id,created_at,id,in_reply_to_user_id,lang,public_metrics,referenced_tweets,reply_settings,source,text
0,1369942870136741894,1474893749155250176,2021-12-26T00:09:47.000Z,1474895179333406723,1.3995587307237294e+18,ja,"{'quote_count': 0, 'retweet_count': 0, 'reply_...","[{'type': 'replied_to', 'id': '147489374915525...",everyone,Twitter for iPhone,@87__riyu あらおいしそう😋\n\nウーバーイーツで届けてもらえるかな？😝
1,134317962,1474895156386349058,2021-12-26T00:09:42.000Z,1474895156386349058,,ja,"{'quote_count': 0, 'retweet_count': 0, 'reply_...",,everyone,feather for iOS,日曜日なのにこれ、ウーバーイーツもう駄目だろ https://t.co/ytq2yQYL7f
2,995834500687872001,1474894978664960002,2021-12-26T00:08:59.000Z,1474894978664960002,,ja,"{'quote_count': 0, 'retweet_count': 0, 'reply_...",,everyone,Twitter Web App,朝から編集が立て込んでいるのでウーバーイーツ頼もうかなと思ったけど…\n高くつくと思ったから...
3,1347927411107614720,1474894922939781122,2021-12-26T00:08:46.000Z,1474894922939781122,,ja,"{'quote_count': 0, 'retweet_count': 0, 'reply_...",,everyone,twittbot.net,・2021年1月最新版・\n\neats-4kub32\n\n上記プロモーションコード使用で...
4,1343587218028331008,1474894889238556674,2021-12-26T00:08:38.000Z,1474894889238556674,,ja,"{'quote_count': 0, 'retweet_count': 0, 'reply_...",,everyone,twittbot.net,♥【期間限定】UberEatsの高額割引クーポン配布♥ 初回注文時に利用可！ 会計の際にプロ...
5,1244066887870234624,1474894868711624705,2021-12-26T00:08:33.000Z,1474894868711624705,,ja,"{'quote_count': 0, 'retweet_count': 0, 'reply_...",,everyone,twittbot.net,い\n友達招待コード クーポン 割引券\nプロモーションコード\nメルカリ「FPXFFY」\...
6,1196700167371550720,1474894858951307267,2021-12-26T00:08:31.000Z,1474894858951307267,,ja,"{'quote_count': 0, 'retweet_count': 0, 'reply_...",,everyone,Botbird tweets,甲府、鳥取、松江、福岡、福井でウーバーイーツ配達員になりたい人いませんか❓\n\n今なら2万...
7,1455881867111002119,1474894854866243584,2021-12-26T00:08:30.000Z,1474894854866243584,,ja,"{'quote_count': 0, 'retweet_count': 0, 'reply_...",,everyone,twittbot.net,主婦さんも〜ウーバーイーツ配達は如何でしょうか！招待コード「ag31nnsvpvfe」と入力...
8,1451181307183046660,1474894845227696129,2021-12-26T00:08:27.000Z,1474894845227696129,,ja,"{'quote_count': 0, 'retweet_count': 0, 'reply_...",,everyone,twittbot.net,食欲の冬！ウーバーイーツデビュー！！\nUber Eats初回限定クーポン発行中！\n\n✅...
9,1282954868487892992,1474894844657270787,2021-12-26T00:08:27.000Z,1474894844657270787,,ja,"{'quote_count': 0, 'retweet_count': 0, 'reply_...",,everyone,twittbot.net,☆☆☆配達員が不足しがちな下記エリアにて、ウーバーイーツ配達員を募集します 広島市、仙台市...


In [14]:
df.to_csv('df.csv')