In [None]:
import requests
import json
import time
import random
import os
import pandas as pd

In [11]:
from dotenv import load_dotenv

load_dotenv()

BEARERTOKEN = os.getenv('BEARERTOKEN')

Source Code : https://medium.com/data-analytics-at-nesta/all-you-need-to-get-started-with-twitter-api-v2-using-python-6cd4be4d90fe

In [12]:
endpoint_url = "https://api.twitter.com/2/tweets/search/recent"

In [13]:
query_parameters = {
    "query": '("makan bergizi gratis" OR "makan siang gratis") lang:id -is:retweet',
    "tweet.fields": "id,text,author_id,created_at",
    "max_results": 10,
}

In [14]:
def request_headers(bearer_token: str) -> dict:
    """
    Sets up the request headers. 
    Returns a dictionary summarising the bearer token authentication details.
    """
    return {"Authorization": "Bearer {}".format(bearer_token)}

headers = request_headers(BEARERTOKEN)

In [None]:
def connect_to_endpoint(endpoint_url: str, headers: dict, parameters: dict) -> json:
    """
    Connects to the endpoint and requests data.
    Returns a json with Twitter data if a 200 status code is yielded.
    Programme stops if there is a problem with the request and sleeps
    if there is a temporary problem accessing the endpoint.
    """
    response = requests.request(
        "GET", url=endpoint_url, headers=headers, params=parameters
    )
    response_status_code = response.status_code
    if response_status_code != 200:
        if response_status_code >= 400 and response_status_code < 500:
            raise Exception(
                "Cannot get data, the program will stop!\nHTTP {}: {}".format(
                    response_status_code, response.text
                )
            )
        
        sleep_seconds = random.randint(5, 60)
        print(
            "Cannot get data, your program will sleep for {} seconds...\nHTTP {}: {}".format(
                sleep_seconds, response_status_code, response.text
            )
        )
        time.sleep(sleep_seconds)
        return connect_to_endpoint(endpoint_url, headers, parameters)
    return response.json()

#uncomment to execute
#json_response = connect_to_endpoint(endpoint_url, headers, query_parameters)

In [23]:
rules = [
    {"value": '("makan bergizi gratis" OR "makan siang gratis") lang:id -is:retweet', "tag": "mbg"},
]

In [27]:
def process_twitter_data(
    json_response: json,
    query_tag: str,
    tweets_data: pd.DataFrame,
    users_data: pd.DataFrame,
) -> (pd.DataFrame, pd.DataFrame):
    """
    Adds new tweet/user information to the table of
    tweets/users and saves dataframes as pickle files,
    if data is avaiable.
    
    Returns the tweets and users updated dataframes.
    """
    if "data" in json_response.keys():
        new = pd.DataFrame(json_response["data"])
        tweets_data = pd.concat([tweets_data, new])
        tweets_data.to_pickle("tweets_" + query_tag + ".pkl")

        if "users" in json_response["includes"].keys():
            new = pd.DataFrame(json_response["includes"]["users"])
            users_data = pd.concat([users_data, new])
            users_data.drop_duplicates("id", inplace=True)
            users_data.to_pickle("users_" + query_tag + ".pkl")

    return tweets_data, users_data

In [28]:
query_parameters = {
    "tweet.fields": "id,text,author_id,created_at",
    "user.fields": "id,name,username,created_at,description,location,verified",
    "expansions": "author_id",
    "max_results": 20,
}

In [35]:
tweets_data = pd.DataFrame()
users_data = pd.DataFrame()

for i in range(len(rules)):
    query_parameters["query"] = rules[i]["value"]
    query_tag = rules[i]["tag"]

    json_response = connect_to_endpoint(endpoint_url, headers, query_parameters)
    tweets_data, users_data = process_twitter_data(
        json_response, query_tag, tweets_data, users_data
    )

    time.sleep(10)

    while "next_token" in json_response["meta"]:
        query_parameters["next_token"] = json_response["meta"]["next_token"]

        json_response = connect_to_endpoint(endpoint_url, headers, query_parameters)
        tweets_data, users_data = process_twitter_data(
            json_response, query_tag, tweets_data, users_data
        )

        time.sleep(10)

Exception: Cannot get data, the program will stop!
HTTP 429: {"title":"Too Many Requests","detail":"Too Many Requests","type":"about:blank","status":429}

In [40]:
json_response

{'data': [{'id': '1879968968250913076',
   'edit_history_tweet_ids': ['1879968968250913076'],
   'created_at': '2025-01-16T19:08:24.000Z',
   'text': 'ini progam makan siang gratis kok cuma buat anak sekolah aj, kita juga ini yg udh lulus laper',
   'author_id': '1451200657336123404'},
  {'id': '1879968672929964270',
   'edit_history_tweet_ids': ['1879968672929964270'],
   'created_at': '2025-01-16T19:07:14.000Z',
   'text': '@jwaluyo Bukan Sekadar Makanan: Kisah Inspiratif di Balik Senyum Bahagia Anak Papua Pegunungan Pada Program Makan Bergizi Gratis Dari Prabowo\n\nhttps://t.co/9DCcAS1kWM',
   'author_id': '833782256837681152'},
  {'id': '1879968626616476010',
   'edit_history_tweet_ids': ['1879968626616476010'],
   'created_at': '2025-01-16T19:07:03.000Z',
   'text': 'Bukan Sekadar Makanan: Kisah Inspiratif di Balik Senyum Bahagia Anak Papua Pegunungan Pada Program Makan Bergizi Gratis Dari Prabowo\n\nhttps://t.co/9DCcAS1kWM https://t.co/RSHEJMXdvF',
   'author_id': '83378225683768

In [39]:
## ganti nama file

with open('jsonresponse2.json', 'w') as fp: 
    json.dump(json_response, fp)

In [60]:
resultdf = pd.DataFrame(columns=['id', 'edit_history_tweet_ids', 'created_at', 'text', 'author_id'])

for i in range(len(json_response['data'])):
    #print(json_response['data'][i])
    insertdf = pd.DataFrame(json_response['data'][i])
    resultdf = pd.concat([resultdf,insertdf])

In [61]:
resultdf

Unnamed: 0,id,edit_history_tweet_ids,created_at,text,author_id
0,1879968968250913076,1879968968250913076,2025-01-16T19:08:24.000Z,ini progam makan siang gratis kok cuma buat an...,1451200657336123404
0,1879968672929964270,1879968672929964270,2025-01-16T19:07:14.000Z,@jwaluyo Bukan Sekadar Makanan: Kisah Inspirat...,833782256837681152
0,1879968626616476010,1879968626616476010,2025-01-16T19:07:03.000Z,Bukan Sekadar Makanan: Kisah Inspiratif di Bal...,833782256837681152
0,1879967261714456712,1879967261714456712,2025-01-16T19:01:37.000Z,@CNNIndonesia Hadeeeehh ada aja yak ... Mendin...,1603521046446895105
0,1879967191539556551,1879967191539556551,2025-01-16T19:01:21.000Z,@MurtadhaOne1 Mungkin Nanti pada saatnya akan ...,109483175
0,1879967111990374652,1879967111990374652,2025-01-16T19:01:02.000Z,@sinnworld Tapi menurutku better than makan si...,1606830366668787712
0,1879966047933198563,1879966047933198563,2025-01-16T18:56:48.000Z,@CNNIndonesia Pasukan nasi bungkus disuruh kaw...,1690677138662584320
0,1879965821134610557,1879965821134610557,2025-01-16T18:55:54.000Z,@soojain @soo__cats halo kak alana! kayanya si...,758240628526297094
0,1879965520793108727,1879965520793108727,2025-01-16T18:54:42.000Z,"@Yurissa_Samosir @kumparan paling bener dpd, d...",321198410
0,1879965432985317824,1879965432985317824,2025-01-16T18:54:21.000Z,@CNNIndonesia Wajarlah kalo usulan ketua DPD t...,1226875580
