In [None]:
#helpful guide https://www.dataquest.io/blog/streaming-data-python/

#If you haven't already done so, visit the Twitter Developer Center and create a developer account. This account will enable us to create credentials that let us authenticate with the Twitter Streaming API.
#Once you create your application on Twitter, you can click on the "Keys and Access Tokens" tab to get your credentials. You'll need to get the following from under "Application Settings":

#Consumer Key (API Key) — we'll refer to this as TWITTER_APP_KEY.
twitter_app_key = ' '
#Consumer Secret (API Secret) — we'll refer to this as TWITTER_APP_SECRET.
twitter_app_secret = ' '

#You'll also need to do the following in the section under "Your Access Token":
#Click "Create my access token"
#Get the value called Access Token — we'll refer to this as TWITTER_KEY.
twitter_key = ' '
#Get the value called Access Token Secret — we'll refer to this as TWITTER_SECRET.
twitter_secret = ' '


#IMPORT NECESSARY PACKAGES#
###########################

#There are a variety of clients for the Twitter Streaming API across all major programming languages. For Python, there are quite a few, which you can find here. The most popular is tweepy, which allows you to connect to the streaming API and handle errors properly.
import tweepy #note you may need to install tweepy first by going to CMD and running pip install tweepy

#Once we have all the data we want on each tweet, we're ready to store it for later processing. It's possible to store our data in a csv file, but a csv file makes it hard to query the data. If we want to read from a csv file, we either have to load the whole thing, or go through a convoluted process to query and only load the pieces we want. A good place to store our data is in a database. Because they are commonly used and easy to query, we'll use a relational database. SQLite is the simplest to use major relational database, as it doesn't require any processes to be running, and everything is stored in a single file. In order to access the database, we'll use the dataset package, which makes it extremely simple to access a database and store data. Instead of creating a database and tables, we simply store data, and the dataset package will take care of automatically creating the database and all the tables we need. 
import dataset

#In order to perform sentiment analysis, we can use a library called TextBlob, which allows us to do sentiment analysis in Python, among other natural language processing tasks.
from textblob import TextBlob

############################

#We have to connect to our database using a connection string:
db = dataset.connect("sqlite:///tweets.db") #This will create a database called "tweets.db" in the same folder as this program

#We can setup tweepy to authenticate with Twitter with the following code:
auth = tweepy.OAuthHandler(twitter_app_key, twitter_app_secret)
auth.set_access_token(twitter_key, twitter_secret )

#Then, we can create an API object to pull data from Twitter — we'll pass in the authentication:
api = tweepy.API(auth)

#As we noted above, opening a Twitter stream using tweepy requires a user-defined listener class. We'll need to subclass the StreamListener class, and implement some custom logic. The StreamListener class has a method called on_data. This method will automatically figure out what kind of data Twitter sent, and call an appropriate method to deal with the specific data type. It's possible to deal with events like users sending direct messages, tweets being deleted, and more. For now, we only care about when users post tweets. Thus, we'll need to override the on_status method:
class StreamListener(tweepy.StreamListener):
     
    def on_status(self, status):
        #Modify the on_status function to filter out retweets. If the retweeted_status property exists, then don't process the tweet. 
        if hasattr(status, 'retweeted_status'): #if status.retweeted_status (as shown in the dataquest guide) didn't work so use this instead https://stackoverflow.com/questions/27095950/tweepy-finding-the-original-author-of-a-retweet
            return
        else: #do the following for all tweets that aren't retweets
            print("New Tweet:")
            
            #print text of tweet
            print(status.text)
            
            #Initialize the TextBlob class on the text of the tweet.
            blob = TextBlob(status.text)
            
            #Get the sentiment score from the class.
            sent = blob.sentiment
            
            #we have already created a database called "tweets" using the dataset package we imported earlier
            db["tweets"].insert(dict( #insert the following data for each tweet into the tweets database
                text=status.text, #creates a column called 'text' and inserts text of tweet
                polarity=sent.polarity, #creates a a column called 'polarity' and inserts polarity of the tweet.  polarity is the negativity or positivity of the tweet, on a -1 to 1 scale.
                subjectivity=sent.subjectivity, #create a column called 'subjectivity' and inserts subjectivity of the tweet.  subjectivity is how objective or subjective the tweet is. 0 means that the tweet is very objective, and 1 means that it is very subjective.
            ))

    #We'll also need to override the on_error method of StreamListener so that we can handle errors coming from the Twitter API properly. The Twitter API will send a 420 status code if we're being rate limited. If this happens, we'll want to disconnect. If it's any other error, we'll keep going:
    def on_error(self, status_code):
        if status_code == 420:
            return False
        
#Create an instance of our StreamListener class.
stream_listener = StreamListener()

#Create an instance of the tweepy Stream class, which will stream the tweets.
#We pass in our authentication credentials (api.auth) so that Twitter allows us to connect.
#We pass in our stream_listener so that our callback functions are called.
stream = tweepy.Stream(auth=api.auth, listener=stream_listener)

#Start streaming tweets by calling the filter method. This will start streaming tweets from the filter.json API endpoint, and passing them to our listener callback.
#We pass in a list of terms to filter on, as the API requires.
stream.filter(languages=['en'],track=["Tesla"])



In [4]:
#import dataset
import sqlite3
conn = sqlite3.connect('tweets.db')
c = conn.cursor()
for row in c.execute('SELECT * FROM tweets'):
        print row

(1, -0.65, u'Every time you idiots come out with something like this you are proven wrong by his quarterly earnings. Just stop i\u2026 https://t.co/ad3bJcgzxd', 0.8500000000000001)
(2, 0.0, u"Tesla's Fundraising Options Get Thornier https://t.co/BrUu6MyIna", 0.0)
(3, 0.2, u'Tesla with Autopilot slams into truck stopped at red light - KLAS-TV https://t.co/sHd9VPZRxx via @meetinnovation #tesla', 0.35)
(4, 0.0, u'US had total of 180,000 vehicle burning accidents per year out of 280 million ICE vehicles on the road= 0.064%. Tes\u2026 https://t.co/TMW4tOXLBE', 0.75)
(5, 0.0, u'Tesla Model 3 delays may stretch on as engineering lead takes a break https://t.co/viJS98ZutC', 0.0)
(6, 0.0, u'Police probe whether Autopilot feature was on in #Tesla crash - Washington Post https://t.co/AuEDnjMeU2', 0.0)
(7, 0.0, u'Tesla Model 3 delays may stretch on as engineering lead takes a break https://t.co/EAmGvvGl4D', 0.0)
(8, 0.5, u'As the crowds in the streets arrive here I have to remember tomorrow I migh