# 1. Extracting tweets

Daniel Ruiz, MSc in Data Science and Business Analytics (DSBA), Bocconi University

Reference codes (alphabetically):
- https://chatbotslife.com/twitter-data-mining-a-guide-to-big-data-analytics-using-python-4efc8ccfa219
- https://developer.twitter.com/en/docs
- http://docs.tweepy.org/en/v3.8.0/cursor_tutorial.html
- https://gist.github.com/vickyqian/f70e9ab3910c7c290d9d715491cde44c
- https://towardsdatascience.com/creating-the-twitter-sentiment-analysis-program-in-python-with-naive-bayes-classification-672e5589a7ed
- https://towardsdatascience.com/extracting-twitter-data-pre-processing-and-sentiment-analysis-using-python-3-0-7192bd8b47cf
- https://towardsdatascience.com/tweepy-for-beginners-24baf21f2c25

## 1.1. Activating the API

In [1]:
# import packages

import csv
import json
import time
import tweepy

In [2]:
# File with list of: 0 Consumer key, 1 Consumer secret, 2 Access Token, and 3 Acces token secret

auths = json.load(open('au.txt'))
auth = tweepy.OAuthHandler(auths[0], auths[1])
auth.set_access_token(auths[2], auths[3])
api = tweepy.API(auth)

# Rest API : Tweets sent before the query started (i.e. historical)
# Streaming API: tweets sent after the query started (i.e. real-time)

## 1.2. Running a query

## 1.3. Creating a function

In [3]:
def write_tweets(api, sup, start_date='2020-01-01', end_date='2020-01-14'):
    
    # start_date = 'YYYY-MM-DD' (inclusive)
    # end_date = 'YYYY-MM-DD' (exclusive)
    # filename= "XXX.csv"

    tweets = tweepy.Cursor(api.search,
                       q=sup[1],
                       tweet_mode='extended',
                       since=start_date,
                       until=end_date,
                       wait_on_rate_limit=True).items()
    
    # Display
    print('searching:', sup[1])
    filename='Dataset_Twitter\\'+sup[0]+'.csv'
    print('folder:', filename)
    
    for tweet in tweets:

        # Open/Create a file to append data
        
        with open(filename, 'a', encoding="utf-8") as csvFile:

            # Use csv writer
            csvWriter = csv.writer(csvFile)

            # other arguments include:
            # tweet.retweet_count, .favorite_count, .entities
            # tweet.user.description, .location

            csvWriter.writerow([tweet.full_text.replace(","," "),
                                tweet.id_str,
                                tweet.created_at,
                                tweet.user.screen_name,
                                tweet.user.followers_count])
    

## 1.4. Running the function

In [4]:
my_companies = [['br_embraer','@EMBRAER OR #EMBRAER'], #0
                ['br_americanas','@LOJASAMERICANAS OR #LOJASAMERICANAS'], #1
                ['br_pontofrio','@PONTOFRIO OR #PONTOFRIO'], #2
                ['br_petrobras','@PETROBRAS OR #PETROBRAS'], #3
                ['br_bradesco','@BRADESCO OR #BRADESCO'], #4
                ['br_renner','@LOJAS_RENNER OR #RENNER'], #5
                ['br_gol','@VOEGOLOFICIAL OR #VOEGOL'], #6
                ['br_magazineluiza','@MAGAZINELUIZA OR #MAGALU'], #7
                ['br_itau','@ITAU OR #ITAU'],
                ['br_valor','@VALORECONOMICO OR #VALORECONOMICO'], #8
                ['us_abercrombie','@ABERCROMBIE OR #ABERCROMBIE'], #9
                ['us_boeing','@BOEING OR #BOEING'], #10
                ['us_beyondmeat','@BEYONDMEAT OR #BEYONDMEAT'], #11
                ['us_morganstanley','@MORGANSTANLEY OR #MORGANSTANLEY'], #12
                ['us_jpmorgan', '@JPMORGAN OR #JPMORGAN'], #13
                ['us_exxonmobil','@EXXONMOBIL OR #EXXON'], #14
                ['us_americanair','@AMERICANAIR OR #AMERICANAIRLINES'], #15
                ['us_cocacola','@COCACOLA OR #COCACOLA'], #16
                ['us_tesla','@TESLA OR #TESLA'], #17
                ['us_wsj','@WSJ OR #WSJ']] #18

In [6]:
i = 0

for company in my_companies[i:19]:
    print(i, company)
    write_tweets(api,sup=company,start_date='2020-04-24',end_date='2020-04-25')
    i+=1
    print('--------------')
    
print('done!')

0 ['br_embraer', '@EMBRAER OR #EMBRAER']
searching: @EMBRAER OR #EMBRAER
folder: Dataset_Twitter\br_embraer.csv
--------------
1 ['br_americanas', '@LOJASAMERICANAS OR #LOJASAMERICANAS']
searching: @LOJASAMERICANAS OR #LOJASAMERICANAS
folder: Dataset_Twitter\br_americanas.csv
--------------
2 ['br_pontofrio', '@PONTOFRIO OR #PONTOFRIO']
searching: @PONTOFRIO OR #PONTOFRIO
folder: Dataset_Twitter\br_pontofrio.csv
--------------
3 ['br_petrobras', '@PETROBRAS OR #PETROBRAS']
searching: @PETROBRAS OR #PETROBRAS
folder: Dataset_Twitter\br_petrobras.csv
--------------
4 ['br_bradesco', '@BRADESCO OR #BRADESCO']
searching: @BRADESCO OR #BRADESCO
folder: Dataset_Twitter\br_bradesco.csv
--------------
5 ['br_renner', '@LOJAS_RENNER OR #RENNER']
searching: @LOJAS_RENNER OR #RENNER
folder: Dataset_Twitter\br_renner.csv
--------------
6 ['br_gol', '@VOEGOLOFICIAL OR #VOEGOL']
searching: @VOEGOLOFICIAL OR #VOEGOL
folder: Dataset_Twitter\br_gol.csv
--------------
7 ['br_magazineluiza', '@MAGAZINELU