In [1]:
# Import dependencies for streaming tweets
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener

# For saving tweets to database
import sqlite3

# For sentiment analysis
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Additional
import json
from unidecode import unidecode
import time

In [2]:
# Import info for Twitter API
from config import access_token_key, access_token_secret, consumer_key, consumer_secret

In [3]:
# Set up sentiment analyzer
sentiment_analyzer = SentimentIntensityAnalyzer()

In [4]:
# Set up database which tweets can be stored in
# Create SQLite database and table
# We will track tweet sentiment, tweet text, and time of tweet
conn = sqlite3.connect('PoliticsPredicted6.db')
c = conn.cursor()

def create_table():
    try:
        c.execute("CREATE TABLE IF NOT EXISTS TwitterDB(unix REAL, tweet TEXT, sentiment REAL)")
        c.execute("CREATE INDEX fast_unix ON TwitterDB(unix)")
        c.execute("CREATE INDEX fast_tweet ON TwitterDB(tweet)")
        c.execute("CREATE INDEX fast_sentiment ON TwitterDB(sentiment)")
        conn.commit()
    except Exception as e:
        print(str(e))

create_table()

In [5]:
# Set up a live stream of all tweets; track and store tweets using this script, run second script to filter
# Use the polarity_scores() method to get the sentiment metrics for a piece of text
# Avoid filtering and tracking dynamically to avoid repeatedly breaking our connection to Twitter
# Include error handling
class listener(StreamListener):
    def on_data(self, data):
        
        try:
            data = json.loads(data)
                     
            tweet = unidecode(data['text'])
            time_ms = data['timestamp_ms']
            vader_sentiment = sentiment_analyzer.polarity_scores(tweet)
            sentiment = vader_sentiment['compound']
            
            print(sentiment, tweet, time)
            c.execute('INSERT INTO TwitterDB (unix, sentiment, tweet) VALUES (?, ?, ?)',
                        (time_ms, sentiment, tweet))
            conn.commit()
            
        except KeyError as e:
            print(str(e))
        return(True)
    
    def on_error(self, status):
        print(status)

# Track vowels to capture all words (all tweets)
while True:
    try:
        auth = OAuthHandler(consumer_key, consumer_secret)
        auth.set_access_token(access_token_key, access_token_secret)
        twitterStream = Stream(auth, listener())
        twitterStream.filter(track=["@realDonaldTrump", "Donald Trump", "President Trump"])
        # Avoid only using 'trump' to avoid overfitting, i.e. ace trump etc.
    
    except Exception as e:
        print(str(e))
        time.sleep(5)
        
        # Code to truncate or reset database to avoid saved data overload

HTTPSConnectionPool(host='stream.twitter.com', port=443): Max retries exceeded with url: /1.1/statuses/filter.json?delimited=length (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x0000021BDF253E80>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed',))
HTTPSConnectionPool(host='stream.twitter.com', port=443): Max retries exceeded with url: /1.1/statuses/filter.json?delimited=length (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x0000021BE0236F60>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed',))
HTTPSConnectionPool(host='stream.twitter.com', port=443): Max retries exceeded with url: /1.1/statuses/filter.json?delimited=length (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x0000021BE024EEB8>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed',))


KeyboardInterrupt: 