# Scraping Market News from Twitter
So all major finance news sources have twitter accounts. In fact even one news institution may have multiple accounts. For example, Financial Times has @FT, @FinancialTimes, @FTMarkets, @fteconomics etc. <br><br>  So I decided to scrape the following market related accounts:
1. @FTMarkets - Financial Times Markets (21.2K followers)
2. @markets - Bloomberg Markets (987.8K followers)
3. @WSJbusiness - Wall Street Journal Business (1.6M followers)
4. @ReutersBiz - Reuters Business (2.2M followers)
5. @BBCBusiness - BBC Business (1.9M followers)
6. @CNBC - CNBC (4.1M followers)
7. @TheEconomist - The Economist (main account) (25.7M followers) [optional]

In [63]:
import pandas as pd
import itertools
from datetime import timedelta, date

from snscrape.modules import twitter
import os
import time
from tqdm.notebook import tqdm, trange

## How snscrape works?

In [20]:
# # simple test 
# keyword = 'from:@FT since:2021-04-01 until:2021-04-02'
# scraped_tweets = twitter.TwitterSearchScraper(keyword).get_items()
# sliced_scraped_tweets = itertools.islice(scraped_tweets, 10000)

## Build my own Scraper

In [28]:
tar_acs = {
    'ftmarkets': '@FTMarkets',
    'bloombergmarkets': '@markets',
    'wsjbusiness': '@WSJbusiness',
    'reutersbusiness': '@ReutersBiz',
    #'theeconomist': '@TheEconomist',
    'bbcbusiness': '@BBCBusiness',
    'cnbc': '@CNBC'
}

In [33]:
# ensure data director folders exist
data_dir = 'data/twitter/'
for name in tar_acs.keys():
    tar_dir = os.path.join(data_dir, name)
    if not os.path.isdir(tar_dir):
        os.mkdir(tar_dir)

In [64]:
# function to return a range of dates
def daterange(start_date, end_date):
    for n in range(int((end_date - start_date).days)):
        yield start_date + timedelta(n)

# function to scrape twitter posts in general from a specific account
def scrape_twitter(target, start_date, end_date, limit=10000):
    query_str = f'from:{tar_acs[target]} since:{str(start_date)} until:{str(end_date)}'
    twt_generator = twitter.TwitterSearchScraper(keyword).get_items()
    sliced_scraped_tweets = itertools.islice(twt_generator, limit)
    return pd.DataFrame(sliced_scraped_tweets)

# function to scrape twitter posts in batch from a specific account
def scrape_twitter_batch_and_save(target, start_date, end_date, wait_time=10, daily_limit=10000):
    dates = [single_date for single_date in daterange(start_date, end_date+timedelta(1))]
    for i in trange(len(dates)):
        df = scrape_twitter(target, dates[i], dates[i+1], limit=daily_limit)
        file_name = f'{target}_{str(start_date)}_{str(end_date)}.csv'
        df.to_csv(os.path.join(data_dir, target, file_name), index=None)
        time.sleep(wait_time)

In [41]:
# quick test 
df = scrape_twitter('ftmarkets', date(2021, 1, 1), date(2021, 1, 2))
df.head()

Unnamed: 0,url,date,content,renderedContent,id,user,outlinks,tcooutlinks,replyCount,retweetCount,...,quoteCount,conversationId,lang,source,sourceUrl,sourceLabel,media,retweetedTweet,quotedTweet,mentionedUsers
0,https://twitter.com/FT/status/1378106329764663299,2021-04-02 22:05:27+00:00,Greece and Turkey top Europe destinations for ...,Greece and Turkey top Europe destinations for ...,1378106329764663299,"{'username': 'FT', 'displayname': 'Financial T...",[https://on.ft.com/39ULhRZ],[https://t.co/sSuRtGfXNl],19,40,...,4,1378106329764663299,en,"<a href=""http://www.socialflow.com"" rel=""nofol...",http://www.socialflow.com,SocialFlow,,,,
1,https://twitter.com/FT/status/1378091582075899912,2021-04-02 21:06:51+00:00,Lessons for an improvised life https://t.co/JN...,Lessons for an improvised life on.ft.com/3dz3w...,1378091582075899912,"{'username': 'FT', 'displayname': 'Financial T...",[https://on.ft.com/3dz3wgN],[https://t.co/JNVGr1il07],6,16,...,1,1378091582075899912,en,"<a href=""http://www.socialflow.com"" rel=""nofol...",http://www.socialflow.com,SocialFlow,,,,
2,https://twitter.com/FT/status/1378075869562953742,2021-04-02 20:04:25+00:00,Independent Scotland would face a large hole i...,Independent Scotland would face a large hole i...,1378075869562953742,"{'username': 'FT', 'displayname': 'Financial T...",[https://on.ft.com/3fBt7rT],[https://t.co/NsMCrwR1TH],91,209,...,43,1378075869562953742,en,"<a href=""http://www.socialflow.com"" rel=""nofol...",http://www.socialflow.com,SocialFlow,,,,
3,https://twitter.com/FT/status/1378071687728615432,2021-04-02 19:47:48+00:00,Europe’s ports set for wave of activity as Sue...,Europe’s ports set for wave of activity as Sue...,1378071687728615432,"{'username': 'FT', 'displayname': 'Financial T...",[https://on.ft.com/3wkmr7r],[https://t.co/hJl4IkWgjB],4,21,...,0,1378071687728615432,en,"<a href=""http://www.socialflow.com"" rel=""nofol...",http://www.socialflow.com,SocialFlow,,,,
4,https://twitter.com/FT/status/1378048453234864128,2021-04-02 18:15:28+00:00,US Capitol building locked down after vehicle ...,US Capitol building locked down after vehicle ...,1378048453234864128,"{'username': 'FT', 'displayname': 'Financial T...",[https://on.ft.com/3drVxlq],[https://t.co/eHZGMQ1lzE],6,8,...,4,1378048453234864128,en,"<a href=""http://www.socialflow.com"" rel=""nofol...",http://www.socialflow.com,SocialFlow,,,,
5,https://twitter.com/FT/status/1378043951794556932,2021-04-02 17:57:35+00:00,‘Covid passports’ to be put to the test at big...,‘Covid passports’ to be put to the test at big...,1378043951794556932,"{'username': 'FT', 'displayname': 'Financial T...",[https://on.ft.com/3mneSZe],[https://t.co/3wLHv54d2w],11,14,...,6,1378043951794556932,en,"<a href=""http://www.socialflow.com"" rel=""nofol...",http://www.socialflow.com,SocialFlow,,,,
6,https://twitter.com/FT/status/1378038821892292613,2021-04-02 17:37:12+00:00,Biden offers support to Ukraine and warns of R...,Biden offers support to Ukraine and warns of R...,1378038821892292613,"{'username': 'FT', 'displayname': 'Financial T...",[https://on.ft.com/3sHvTQ2],[https://t.co/fgKDtzNAwn],13,22,...,7,1378038821892292613,en,"<a href=""http://www.socialflow.com"" rel=""nofol...",http://www.socialflow.com,SocialFlow,,,,
7,https://twitter.com/FT/status/1378035492462211075,2021-04-02 17:23:58+00:00,Turkey fines foreign banks for ‘short selling ...,Turkey fines foreign banks for ‘short selling ...,1378035492462211075,"{'username': 'FT', 'displayname': 'Financial T...",[https://on.ft.com/3uerrss],[https://t.co/XPosRn7UvG],4,22,...,4,1378035492462211075,en,"<a href=""http://www.socialflow.com"" rel=""nofol...",http://www.socialflow.com,SocialFlow,,,,
8,https://twitter.com/FT/status/1378030389260865540,2021-04-02 17:03:42+00:00,‘They can do what they want’: Archegos and the...,‘They can do what they want’: Archegos and the...,1378030389260865540,"{'username': 'FT', 'displayname': 'Financial T...",[https://on.ft.com/3cIn6rp],[https://t.co/lFXvfH0uZH],7,20,...,6,1378030389260865540,en,"<a href=""http://www.socialflow.com"" rel=""nofol...",http://www.socialflow.com,SocialFlow,,,,
9,https://twitter.com/FT/status/1378027475364634624,2021-04-02 16:52:07+00:00,Will Shu: Deliveroo’s shaken but undeterred fo...,Will Shu: Deliveroo’s shaken but undeterred fo...,1378027475364634624,"{'username': 'FT', 'displayname': 'Financial T...",[https://on.ft.com/3mdeV9X],[https://t.co/W6B9ro58g7],2,4,...,1,1378027475364634624,en,"<a href=""http://www.socialflow.com"" rel=""nofol...",http://www.socialflow.com,SocialFlow,,,,


In [65]:
scrape_twitter_batch_and_save('ftmarkets', date(2021, 1, 1), date(2021, 4, 30))

Exception ignored in: <function tqdm.__del__ at 0x7f818709d310>
Traceback (most recent call last):
  File "/home/see/anaconda3/envs/env_scraper/lib/python3.8/site-packages/tqdm/std.py", line 1145, in __del__
    self.close()
  File "/home/see/anaconda3/envs/env_scraper/lib/python3.8/site-packages/tqdm/notebook.py", line 283, in close
    self.disp(bar_style='danger', check_delay=False)
AttributeError: 'tqdm_notebook' object has no attribute 'disp'


ImportError: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html