* Author: Limor Gultchin
* Date: Oct 13th, 2023
* Goal: Scrape tweets of target user using Twitter's official API

In [29]:
import requests
import os
import json
import pandas as pd

# TODO: specify bearer token
# Option 1: To set your environment variables in your terminal run the following line:
# export 'BEARER_TOKEN'='<your_bearer_token>'
# bearer_token = os.environ.get("BEARER_TOKEN")
# OR 
# Option 2: replace directly below 
bearer_token = ''
# TODO: specify user_id of target twitter handle
user_id = np.nan

def create_url(user_id):
    # Replace with user ID below
    user_id = user_id
    return "https://api.twitter.com/2/users/{}/tweets".format(user_id)


def get_params(start_time, end_time):
    # Tweet fields are adjustable.
    # Options include:
    # attachments, author_id, context_annotations,
    # conversation_id, created_at, entities, geo, id,
    # in_reply_to_user_id, lang, non_public_metrics, organic_metrics,
    # possibly_sensitive, promoted_metrics, public_metrics, referenced_tweets,
    # source, text, and withheld
    return {"tweet.fields": "created_at",
            "start_time": start_time,
            "end_time": end_time,
            "max_results": 100
            }


def bearer_oauth(r):
    """
    Method required by bearer token authentication.
    """

    r.headers["Authorization"] = f"Bearer {bearer_token}"
    r.headers["User-Agent"] = "v2UserTweetsPython"
    return r


def connect_to_endpoint(url, params):
    response = requests.request("GET", url, auth=bearer_oauth, params=params)
    print(response.status_code)
    if response.status_code != 200:
        raise Exception(
            "Request returned an error: {} {}".format(
                response.status_code, response.text
            )
        )
    return response.json()


def main(start_time, end_time, user_id):
    url = create_url(user_id)
    params = get_params(start_time, end_time)
    json_response = connect_to_endpoint(url, params)
    print(json.dumps(json_response, indent=4, sort_keys=True, ensure_ascii=False))
    return json_response

In [32]:
dfs = []
times = [("2023-10-07T04:30:00+00:00", "2023-10-07T12:30:00+00:00"), 
         ("2023-10-07T12:30:01+00:00", "2023-10-07T20:30:00+00:00"), 
         ("2023-10-07T20:30:01+00:00", "2023-10-08T04:30:00+00:00"),
         ("2023-10-08T04:30:01+00:00", "2023-10-08T12:30:00+00:00"),
         ("2023-10-08T12:30:01+00:00", "2023-10-08T20:30:00+00:00"),
         ("2023-10-08T20:30:01+00:00", "2023-10-09T04:30:00+00:00")]
for time in times:
    print(f"processing {time}")
    response = main(time[0], time[1], user_id)
    df = pd.DataFrame(response['data'])
    df.to_csv(f"id{user_id}_{time[0][:19]}_{time[1][:19]}.csv")
    dfs.append(df)
    
# concatenate all dfs
concatenated_df = pd.concat(dfs)
concatenated_df.to_csv(f"id_{user_id}_all.csv")
    

processing ('2023-10-07T04:30:00+00:00', '2023-10-07T12:30:00+00:00')
200
{
    "data": [
        {
            "created_at": "2023-10-07T12:28:38.000Z",
            "edit_history_tweet_ids": [
                "1710633227247841428"
            ],
            "id": "1710633227247841428",
            "text": "אחד השכנים מצלם. כאמור נראה צהל אבל בגלל שלא מתקשרים למספר מדווח אין לנו איך לדעת https://t.co/ojEIxWHkcy"
        },
        {
            "created_at": "2023-10-07T12:27:28.000Z",
            "edit_history_tweet_ids": [
                "1710632933680177521"
            ],
            "id": "1710632933680177521",
            "text": "ניסן מדווח https://t.co/I34VMO0sk0"
        },
        {
            "created_at": "2023-10-07T12:26:47.000Z",
            "edit_history_tweet_ids": [
                "1710632764880433635"
            ],
            "id": "1710632764880433635",
            "text": "שריקות בחוץ"
        },
        {
            "created_at": "2023-10-07T12:21:57.000Z",


Exception: Request returned an error: 429 {"title":"Too Many Requests","detail":"Too Many Requests","type":"about:blank","status":429}

In [33]:
# Read in all csv files in path, concatenate and save final
# set path to directory containing csv files
# TODO: replace with path where scrapes are saved
path = ""

# create empty list to store dataframes
dfs = []

# loop through each file in the directory
for file in os.listdir(path):
    # check if file is a csv file
    if file.endswith(".csv"):
        # read csv file into a dataframe
        df = pd.read_csv(os.path.join(path, file))
        # append dataframe to list of dataframes
        dfs.append(df)

# concatenate all dataframes into one
final_df = pd.concat(dfs)

# save final dataframe to csv file
final_df.to_csv("final.csv", index=False)