In [None]:
from twitter.search import Search
from twitter.util import find_key
import pandas as pd
import datetime

In [None]:
query = 'americanas lang:pt since:2023-09-25'

In [None]:
search = Search(accounts_json_path="twitter_accounts.json",
                collection_limit_per_account=500,
                hours_to_reset_collection=12,
                debug=True
                )

latest_results = search.run(
    limit=10000,
    latest=True,
    retries=3,
    queries=[
        {
            'category': 'Latest',
            'query': query
        },
    ],
)

In [None]:
def get_tweets(data: list | dict):
    tweets = [y for x in data for y in x if not y.get('entryId').startswith('promoted')]

    df = (
        pd.json_normalize(find_key(tweets, 'tweet_results'), max_level=1)
        ['result.legacy'].apply(pd.Series)
        .dropna(subset='user_id_str')
        .assign(created_at=lambda x: pd.to_datetime(x['created_at'], format="%a %b %d %H:%M:%S %z %Y"))
        .sort_values('created_at', ascending=False)
        .reset_index(drop=True)
    )
    numeric = [
        'bookmark_count',
        'favorite_count',
        'quote_count',
        'reply_count',
        'retweet_count',
    ]

    df[numeric] = df[numeric].apply(pd.to_numeric, errors='coerce')
    
    df.rename(columns={'user_id_str': 'user_id', 
                       'id_str': 'id',
                       'conversation_id_str': 'conversation_id', 
                       'quoted_status_id_str': 'quoted_status_id', 
                       'in_reply_to_user_id_str': 'in_reply_to_user_id', 
                       'in_reply_to_status_id_str': 'in_reply_to_status_id'}, 
                       inplace=True)
    
    df['created_at'] = df['created_at'].dt.tz_convert(None).dt.strftime("%Y-%m-%d %H:%M:%S 00:00")

    return df

In [None]:
df = get_tweets(latest_results)

In [None]:
df.to_excel(f"{query[:15]}-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}.xlsx")

In [None]:
df