# Twitter Streaming API  


modified from https://realpython.com/twitter-bot-python-tweepy/  
and https://www.storybench.org/how-to-collect-tweets-from-the-twitter-streaming-api-using-python/

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import tweepy
import pathlib
import json

with open('config.json') as f:
    config = json.load(f)

    plt.style.use('seaborn-poster')
%matplotlib inline

### Authenticate credentials.  
Note: All of the credentials will be removed after the lesson. You have to generate and use your own.

In [None]:
auth = tweepy.AppAuthHandler(config["key"], config["secret"])

api = tweepy.API(auth)

# print some tweets
api = tweepy.API(auth)
for tweet in tweepy.Cursor(api.search, q='heroes del silencio').items(10):
    print(tweet.text)

Create a custom Streamer that saves tweets on a csv file or json

In [None]:
class MyStreamListener(tweepy.StreamListener):

    def on_status(self, status):
        print(status.id_str)
        is_retweet = hasattr(status, "retweeted_status") # check if retweet.

        if hasattr(status,"extended_tweet"): # check if text has been truncated
            text = status.extended_tweet["full_text"]
        else:
            text = status.text

        is_quote = hasattr(status, "quoted_status") # check if this is a quote tweet.
        quoted_text = ""
        if is_quote:
            if hasattr(status.quoted_status,"extended_tweet"): # check if quote has been truncated
                quoted_text = status.quoted_status.extended_tweet["full_text"]
            else:
                quoted_text = status.quoted_status.text

        # remove end of line characters
        text.replace("\n"," ")
        quoted_text.replace("\n", " ")
                
        # save on json file
        file = pathlib.Path("tweets_stream.json") 
        tweet = dict(created_at=status.created_at,
                     user_name=status.user.screen_name,
                     retweet=is_retweet,
                     quote=is_quote,
                     text=text,
                     quote_text=quoted_text)  
        if not file.is_file():
            with open(file, 'w') as f:
                json.dump({"stream": []}, f, indent=4)
        
        with open(file, 'r+') as f:
            data = json.load(f)
            data["stream"].append(tweet)
            json.dump(data, f, indent=4)
            
        # save on csv file
        file = pathlib.Path("tweets_stream.csv")        
        if not file.is_file():
            with open(file, 'w', encoding='utf-8') as f:
                f.write("created_at,user_name,retweet,quote,text,quote_text\n")
                
        with open(file, 'a', encoding='utf-8') as f:
            f.write(f"{status.created_at},{status.user.screen_name},{is_retweet},{is_quote},{text},{quoted_text}\n")            
            
    def on_error(self, status_code):
        print("Encountered streaming error (", status_code, ")")
        sys.exit()

Retrieve tweets based on a filter

In [None]:
listener = MyStreamListener()
stream = tweepy.Stream(auth=api.auth, listener=listener, tweet_mode='extended')

# filter by keywords
# bounding box as a pair of longitude and latitude pairs
# track = keywords
stream.filter(track=['beer'], languages=['en'], locations=[51.2, -0.8, 51.8, 0.5]) # London