In [1]:
# Import packages
## tweepy - api to get data from twitter
import tweepy as tw
## hiding API keys - ref: https://youtu.be/CJjSOzb0IYs
import apikeys
## data manipulation
import pandas as pd
## date transformation
from datetime import datetime, timedelta

In [2]:
## Authentication Tokens
my_bearer = apikeys.bearer
my_key = apikeys.key
my_secret = apikeys.secret
my_token = apikeys.token
my_token_secret = apikeys.token_secret

## creating client object
client = tw.Client(
    bearer_token=my_bearer, 
    consumer_key=my_key, 
    consumer_secret=my_secret, 
    access_token=my_token, 
    access_token_secret=my_token_secret
    )

## creating API object
# authorization of consumer key and consumer secret
auth = tw.OAuthHandler(
    consumer_key=my_key, 
    consumer_secret=my_secret
    )
# set access to user's access key and access secret 
auth.set_access_token(my_token, my_token_secret)
# calling the api 
api = tw.API(auth)

In [3]:
# Replace with your own search query
query_list = ['@LulaOficial', '@jairbolsonaro', '@cirogomes', '@simonetebetbr']
query_filter = ' -is:retweet'

In [39]:
# Getting data from twitter

## all recent tweets mentions
counts_recent = client.get_recent_tweets_count(query=query_list[0], granularity='hour')
## recent tweets mentions without retweets
counts_recent_filtered = client.get_recent_tweets_count(query=query_list[0] + query_filter, granularity='hour')

## creating the dataframe and removing the first and the last range
df_all = pd.DataFrame(counts_recent[0], columns=['start', 'end', 'tweet_count']).rename(columns={"tweet_count": query_list[0] + '_mentions'})
## convert date columns from string to datetime
df_all['start'] = pd.to_datetime(df_all['start'], utc=True).map(lambda x: x.tz_convert('America/Bahia'))
df_all['end'] = pd.to_datetime(df_all['end'], utc=True).map(lambda x: x.tz_convert('America/Bahia'))
## split datetime into date and time
df_all['start_date'] = pd.to_datetime(df_all['start']).dt.date
df_all['start_time'] = pd.to_datetime(df_all['start']).dt.time
df_all['end_date'] = pd.to_datetime(df_all['end']).dt.date
df_all['end_time'] = pd.to_datetime(df_all['end']).dt.time
## reorder the columns sequence
df_all = df_all.loc[:, ['start', 'start_date', 'start_time', 'end', 'end_date', 'end_time', query_list[0] + '_mentions']]
## remove the first and the last line
df_all = df_all.iloc[1:len(df_all)-1].reset_index(drop=True)
## get tweet count without retweets
df_temp = pd.DataFrame(counts_recent_filtered[0], columns=['tweet_count']).rename(columns={"tweet_count": query_list[0] + '_mentions_without_retweet'}).iloc[1:len(df_all)-1].reset_index(drop=True)
df_all = pd.concat([df_all, df_temp], axis=1)
## check numeric columns type - fillna and convert to integer
df_all[query_list[0] + '_mentions'] = df_all[query_list[0] + '_mentions'].fillna(0).astype(int)
df_all[query_list[0] + '_mentions_without_retweet'] = df_all[query_list[0] + '_mentions_without_retweet'].fillna(0).astype(int)

## loop - get data about the top 4 candidates
for i in range(1, len(query_list)):
    
    ## all recent tweets mentions
    counts_recent = client.get_recent_tweets_count(query=query_list[i], granularity='hour')
    ## recent tweets mentions without retweets
    counts_recent_filtered = client.get_recent_tweets_count(query=query_list[i] + query_filter, granularity='hour')

    ## get tweet count with retweets
    df_temp = pd.DataFrame(counts_recent[0], columns=['tweet_count']).rename(columns={"tweet_count": query_list[i] + '_mentions'}).iloc[1:len(df_all)-1].reset_index(drop=True)
    df_all = pd.concat([df_all, df_temp], axis=1)
    
    ## get tweet count without retweets
    df_temp = pd.DataFrame(counts_recent_filtered[0], columns=['tweet_count']).rename(columns={"tweet_count": query_list[i] + '_mentions_without_retweet'}).iloc[1:len(df_all)-1].reset_index(drop=True)
    df_all = pd.concat([df_all, df_temp], axis=1)
    
    ## check numeric columns type - fillna and convert to integer
    df_all[query_list[i] + '_mentions'] = df_all[query_list[i] + '_mentions'].fillna(0).astype(int)
    df_all[query_list[i] + '_mentions_without_retweet'] = df_all[query_list[i] + '_mentions_without_retweet'].fillna(0).astype(int)

## final dataset
df_all

Unnamed: 0,start,start_date,start_time,end,end_date,end_time,@LulaOficial_mentions,@LulaOficial_mentions_without_retweet,@jairbolsonaro_mentions,@jairbolsonaro_mentions_without_retweet,@cirogomes_mentions,@cirogomes_mentions_without_retweet,@simonetebetbr_mentions,@simonetebetbr_mentions_without_retweet
0,2022-09-11 11:00:00-03:00,2022-09-11,11:00:00,2022-09-11 12:00:00-03:00,2022-09-11,12:00:00,7948,4315,7038,1867,2119,1074,271,163
1,2022-09-11 12:00:00-03:00,2022-09-11,12:00:00,2022-09-11 13:00:00-03:00,2022-09-11,13:00:00,7646,3910,11488,2813,2282,1096,309,224
2,2022-09-11 13:00:00-03:00,2022-09-11,13:00:00,2022-09-11 14:00:00-03:00,2022-09-11,14:00:00,7521,3747,10778,2561,1700,811,187,140
3,2022-09-11 14:00:00-03:00,2022-09-11,14:00:00,2022-09-11 15:00:00-03:00,2022-09-11,15:00:00,7191,3639,8604,2225,1498,812,231,172
4,2022-09-11 15:00:00-03:00,2022-09-11,15:00:00,2022-09-11 16:00:00-03:00,2022-09-11,16:00:00,7850,4084,7466,1834,1599,924,186,127
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162,2022-09-18 05:00:00-03:00,2022-09-18,05:00:00,2022-09-18 06:00:00-03:00,2022-09-18,06:00:00,988,462,1231,338,169,73,23,14
163,2022-09-18 06:00:00-03:00,2022-09-18,06:00:00,2022-09-18 07:00:00-03:00,2022-09-18,07:00:00,2023,1022,2229,598,316,130,35,20
164,2022-09-18 07:00:00-03:00,2022-09-18,07:00:00,2022-09-18 08:00:00-03:00,2022-09-18,08:00:00,3474,1926,5435,1229,536,225,68,43
165,2022-09-18 08:00:00-03:00,2022-09-18,08:00:00,2022-09-18 09:00:00-03:00,2022-09-18,09:00:00,5012,0,0,0,0,0,0,0


In [None]:
# Replace with your own users id 
## @LulaOficial id = 2670726740
## @jairbolsonaro id = 128372940 
## @cirogomes id = 33374761
## @simonetebetbr id = 2508415207
user_id_list = [2670726740, 128372940, 33374761, 2508415207]

In [None]:
user = api.get_user(user_id=user_id_list[0])
user.followers_count
user.friends_count
user.statuses_count
user.listed_count
user.favourites_count

In [None]:
# Saving dataset
## last 5 days
#df_all.to_csv('count_data/from_' + df_all.loc[0, 'start'][0:10] + '_to_' + df_all.loc[len(df_all)-1, 'start'][0:10] + '.csv')
## only last day
#df_all.loc[len(df_all)-1:, :].to_csv('count_data/day_' + df_all.loc[len(df_all)-1, 'start'][0:10] + '.csv')