In [6]:
%%time
# script for scraping tweets
# snscrape needs python >= 3.8
#!pip3 install git+https://github.com/JustAnotherArchivist/snscrape.git

import os
import pandas as pd
import snscrape.modules.twitter as sntwitter

ticker = "AMZN"
since = "2021-01-01"
until = "2021-05-01" #exclusive
folder = "tweets"

for date in pd.date_range(since, until)[:-1]:
    search = f'"${ticker}" lang:en since:{date.date()} until:{date.date() + pd.Timedelta(days=1)} -filter:replies'
    print(f"Starting scraping for day {date.date()} ...")
    print("Search query is:", search)
    # get_items() returns generator
    scraped_tweets = sntwitter.TwitterSearchScraper(search).get_items()
    try:
        df = pd.DataFrame(scraped_tweets)[["url", "date", "content"]]
        os.makedirs(folder, exist_ok=True)
        filename = os.path.join(folder,f"{ticker}_{date.date()}.csv")
        df.to_csv(filename, index=False)
        print(f"Tweets saved to file {filename}")
        print(f"Tweet dataframe shape is {df.shape}")
    except KeyError:
        print(f"No tweets for day {date.date()}")

    print(f"Finished scraping for day {date.date()}")
    

Starting scraping for day 2021-01-01 ...
Search query is: "$AMZN" lang:en since:2021-01-01 until:2021-01-02 -filter:replies
Tweets saved to file tweets/AMZN_2021-01-01.csv
Tweet dataframe shape is (282, 3)
Finished scraping for day 2021-01-01
Starting scraping for day 2021-01-02 ...
Search query is: "$AMZN" lang:en since:2021-01-02 until:2021-01-03 -filter:replies
Tweets saved to file tweets/AMZN_2021-01-02.csv
Tweet dataframe shape is (278, 3)
Finished scraping for day 2021-01-02
Starting scraping for day 2021-01-03 ...
Search query is: "$AMZN" lang:en since:2021-01-03 until:2021-01-04 -filter:replies
Tweets saved to file tweets/AMZN_2021-01-03.csv
Tweet dataframe shape is (311, 3)
Finished scraping for day 2021-01-03
Starting scraping for day 2021-01-04 ...
Search query is: "$AMZN" lang:en since:2021-01-04 until:2021-01-05 -filter:replies
Tweets saved to file tweets/AMZN_2021-01-04.csv
Tweet dataframe shape is (1062, 3)
Finished scraping for day 2021-01-04
Starting scraping for day 2

Tweets saved to file tweets/AMZN_2021-02-04.csv
Tweet dataframe shape is (1039, 3)
Finished scraping for day 2021-02-04
Starting scraping for day 2021-02-05 ...
Search query is: "$AMZN" lang:en since:2021-02-05 until:2021-02-06 -filter:replies
Tweets saved to file tweets/AMZN_2021-02-05.csv
Tweet dataframe shape is (896, 3)
Finished scraping for day 2021-02-05
Starting scraping for day 2021-02-06 ...
Search query is: "$AMZN" lang:en since:2021-02-06 until:2021-02-07 -filter:replies
Tweets saved to file tweets/AMZN_2021-02-06.csv
Tweet dataframe shape is (385, 3)
Finished scraping for day 2021-02-06
Starting scraping for day 2021-02-07 ...
Search query is: "$AMZN" lang:en since:2021-02-07 until:2021-02-08 -filter:replies
Tweets saved to file tweets/AMZN_2021-02-07.csv
Tweet dataframe shape is (395, 3)
Finished scraping for day 2021-02-07
Starting scraping for day 2021-02-08 ...
Search query is: "$AMZN" lang:en since:2021-02-08 until:2021-02-09 -filter:replies
Tweets saved to file tweets

Tweets saved to file tweets/AMZN_2021-03-10.csv
Tweet dataframe shape is (662, 3)
Finished scraping for day 2021-03-10
Starting scraping for day 2021-03-11 ...
Search query is: "$AMZN" lang:en since:2021-03-11 until:2021-03-12 -filter:replies
Tweets saved to file tweets/AMZN_2021-03-11.csv
Tweet dataframe shape is (763, 3)
Finished scraping for day 2021-03-11
Starting scraping for day 2021-03-12 ...
Search query is: "$AMZN" lang:en since:2021-03-12 until:2021-03-13 -filter:replies
Tweets saved to file tweets/AMZN_2021-03-12.csv
Tweet dataframe shape is (641, 3)
Finished scraping for day 2021-03-12
Starting scraping for day 2021-03-13 ...
Search query is: "$AMZN" lang:en since:2021-03-13 until:2021-03-14 -filter:replies
Tweets saved to file tweets/AMZN_2021-03-13.csv
Tweet dataframe shape is (274, 3)
Finished scraping for day 2021-03-13
Starting scraping for day 2021-03-14 ...
Search query is: "$AMZN" lang:en since:2021-03-14 until:2021-03-15 -filter:replies
Tweets saved to file tweets/

Tweets saved to file tweets/AMZN_2021-04-13.csv
Tweet dataframe shape is (919, 3)
Finished scraping for day 2021-04-13
Starting scraping for day 2021-04-14 ...
Search query is: "$AMZN" lang:en since:2021-04-14 until:2021-04-15 -filter:replies
Tweets saved to file tweets/AMZN_2021-04-14.csv
Tweet dataframe shape is (832, 3)
Finished scraping for day 2021-04-14
Starting scraping for day 2021-04-15 ...
Search query is: "$AMZN" lang:en since:2021-04-15 until:2021-04-16 -filter:replies
Tweets saved to file tweets/AMZN_2021-04-15.csv
Tweet dataframe shape is (862, 3)
Finished scraping for day 2021-04-15
Starting scraping for day 2021-04-16 ...
Search query is: "$AMZN" lang:en since:2021-04-16 until:2021-04-17 -filter:replies
Tweets saved to file tweets/AMZN_2021-04-16.csv
Tweet dataframe shape is (872, 3)
Finished scraping for day 2021-04-16
Starting scraping for day 2021-04-17 ...
Search query is: "$AMZN" lang:en since:2021-04-17 until:2021-04-18 -filter:replies
Tweets saved to file tweets/