In [1]:
import snscrape.modules.twitter as sntwitter
import pandas as pd
import datetime as dt
from datetime import datetime
import pytz
from tqdm.notebook import tqdm

In [2]:
def scrap_twitter_single_search(bank, search_term, start_date, end_date):
    # pytz to localize the date
    utc=pytz.UTC
    
    # Converting start_date and end_date to datetime objects
    start_date = utc.localize(datetime.strptime(start_date, "%Y-%m-%d"))
    end_date = utc.localize(datetime.strptime(end_date, "%Y-%m-%d"))

    # Creating list to append tweet data to
    tweets_list = []

    # Using TwitterSearchScraper to scrape data and append tweets to list
    for i,tweet in enumerate(sntwitter.TwitterSearchScraper(f'{search_term} since:{start_date:%Y-%m-%d} until:{end_date:%Y-%m-%d} lang:en').get_items()):
        
        # Checking if tweet date is before start_date 
        if tweet.date < start_date:
            break

        tweets_list.append([tweet.date, tweet.id, tweet.rawContent, tweet.user.username, tweet.replyCount, tweet.retweetCount, tweet.likeCount])

    # Creating a dataframe from the tweets list above
    tweets_df = pd.DataFrame(tweets_list, columns=['Datetime', 'Tweet_Id', 'Text', 'Username', 'Reply_Count', 'Retweet_Count', 'Like_Count'])
    
    # Adding Bank column to the dataframe
    tweets_df["Bank"] = bank
    
    return tweets_df

def scrap_twitter_multiple_search(bank_dict, start_date, end_date):
    
    dfs = []
    
    # Loop through banks
    for bank in tqdm(bank_dict.keys(), total=len(bank_dict)):
 
        # Loop through each search term per bank
        for search_term in tqdm(bank_dict[bank], total=len(bank_dict[bank])):
            dfs.append(scrap_twitter_single_search(bank, search_term, start_date, end_date))

    # Concatenate multiple dataframes and drop duplicate tweets
    result = pd.concat(dfs).drop_duplicates()
    
    return result

In [3]:
start_date = '2019-01-01'
end_date = '2019-01-02'

bank_dict = {'fnb': ['fnb', 'FNBSA', 'fnbSouthAfrica'],
             'absa': ['absa', 'absaSA', 'ABSASouthAfrica'],
             'nedbank': ['nedbank', 'NEDBANKSA', 'nedbankSouthAfrica'],
             'capitec': ['capitec', 'CapitecBank', 'capitecSA'],
             'standard_bank': ['standard bank','standardbank', 'StandardbankSA']}

In [4]:
df = scrap_twitter_multiple_search(bank_dict, start_date, end_date)

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
# Inspect tweets and order by the most liked tweets
df.head(5).sort_values(by='Like_Count', ascending=False)

Unnamed: 0,Datetime,Tweet_Id,Text,Username,Reply_Count,Retweet_Count,Like_Count,Bank
2,2019-01-01 22:29:20+00:00,1080229514712678400,Them Ⓜ️'s coming in I let 'em stack up 💰 Don't...,fnb_justo,0,1,11,fnb
0,2019-01-01 23:32:42+00:00,1080245458730184704,@SlowbucksAce Trippin 🤦🏽‍♂️ gotta give it to g...,fnb_justo,0,0,3,fnb
1,2019-01-01 22:36:31+00:00,1080231323552411650,"Fake news or nah, I needed to see that SMS fro...",BangDulamo_ZA,0,0,0,fnb
3,2019-01-01 21:55:39+00:00,1080221037395169280,FNB is so annoying with their unauthorized deb...,babyLangah,0,0,0,fnb
4,2019-01-01 21:38:11+00:00,1080216641684819968,@hothaata Forget the critics let's start 2019 ...,NavasExpert,0,0,0,fnb
