# Summary

This notebook demonstrates the use of `snscrape` social-media scrapper to efficiently (with parallelization) retrieve tweets from multiple accounts.

As an example, we will retrieve recent tweets of well-known futurists. 

In [3]:
# install social media scrapper: !pip3 install snscrape
import snscrape.modules.twitter as sntwitter
import itertools
import multiprocessing as mp
import datetime
import pandas as pd
import logging

# configure logging
logging.basicConfig()
logger = logging.getLogger('scrapping')
logger.setLevel(logging.INFO)


# twits ranges
start_date = datetime.datetime(2021,1,1,tzinfo=datetime.timezone.utc)
attributes = ('date','url','rawContent')


def get_tweets(username,n_tweets=None,attributes=attributes):
    tweets = sntwitter.TwitterSearchScraper(f'from:{username}').get_items() # invoke the scrapper
    tweets = itertools.islice(tweets,n_tweets) # stopped when the count reached
    tweets = itertools.takewhile(lambda t:t.date>=start_date, tweets) # stop when date passed
    tweets = map(lambda t: (username,)+tuple(getattr(t,a) for a in attributes),tweets) # keep only attributes needed
    pd.DataFrame(tweets).to_csv(f'../data/futurists_tweets/{username}.csv')
    logger.info(username)
    return tweets


# a list of accounts to scrape
user_names = pd.read_csv('../data/futurists.csv')['Twitter'].to_list()

# parallelise queries for speed ! 
with mp.Pool(4) as p:
    results = p.imap(get_tweets, user_names)
    p.close()
    p.join()

INFO:scrapping:kevin2kelly
INFO:scrapping:michiokaku
INFO:scrapping:michellzappa
INFO:scrapping:PeterDiamandis
INFO:scrapping:janlgordon
INFO:scrapping:rossdawson
INFO:scrapping:GeniusWorks
INFO:scrapping:briansolis
INFO:scrapping:Richard_Florida
INFO:scrapping:RetailProphet
INFO:scrapping:avantgame
INFO:scrapping:gleonhard
INFO:scrapping:ayeletb
INFO:scrapping:mgorbis
INFO:scrapping:zephoria
INFO:scrapping:rushkoff
INFO:scrapping:futuristufuk
INFO:scrapping:fhioxford
INFO:scrapping:KurzweilAINews
INFO:scrapping:Joi
INFO:scrapping:ramez
INFO:scrapping:aubreydegrey
INFO:scrapping:jhagel
INFO:scrapping:GreatDismal
INFO:scrapping:GlenHiemstra
INFO:scrapping:alphanmanas
INFO:scrapping:FaithPopcorn
INFO:scrapping:nd_kane
INFO:scrapping:ThomasFrey
INFO:scrapping:frankdiana
INFO:scrapping:rodfalcon
INFO:scrapping:DavidSmithGFF
INFO:scrapping:jesse
INFO:scrapping:DanielBurrus
INFO:scrapping:jcmeister
INFO:scrapping:cshirky
INFO:scrapping:Anabjain
INFO:scrapping:dunagan23
INFO:scrapping:webmast

In [21]:
import pandas as pd
pd.set_option('max_colwidth', 800)
df = pd.DataFrame(data=results,columns=('user',)+attributes)


ValueError: DataFrame constructor not properly called!