In [15]:
#Imports
import redis
import urllib.request
import json
import numpy as np
import pandas as pd
import schedule
import time
import boto3

#Twitter requirements
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy import API

kinesis = boto3.client('kinesis', region_name='us-east-1')


#Connect to Redis-DataStore
REDIS = redis.Redis(host='data_store')

In [13]:
#Get Environment Variables
ACCESS_TOKEN = %env ACCESS_TOKEN
ACCESS_TOKEN_SECRET = %env ACCESS_TOKEN_SECRET
CONSUMER_KEY = %env CONSUMER_KEY
CONSUMER_SECRET = %env CONSUMER_SECRET

#### Build Company DataFrame

In [7]:
companies = json.loads(REDIS.get('companies').decode())
company_df = pd.DataFrame.from_dict(companies, orient='index')
company_df.index.name = 'Ticker'
company_df.columns=['Company']
#Add code to add ticker symbol
company_df['tweet_ticker']=company_df.index.map(lambda x: '$'+x)

company_df.head()

Unnamed: 0_level_0,Company,tweet_ticker
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
AAPL,Apple,$AAPL
FB,Facebook,$FB
GOOG,Google,$GOOG


In [9]:
tickers = company_df['tweet_ticker'].tolist()

In [20]:
#Want to create a simple list, but how do I handle the users sub?
attributes = ['created_at',
             'id_str',
             'text',
              'quote_count',
              'reply_count',
              'retweet_count',
              'favorite_count',
              'retweeted',
              'lang',
              ['user','name'],
              ['user','followers_count'],
              ['user','statuses_count'],
              ['user','screen_name'],
              ['entities','hashtags'],
              ['entities','symbols']
               ]
def filter_attr(data):
    output = {}
    #Choose filter attributes
    for element in attributes:
        if isinstance(element, str):
            output[element]=data[element]
        #Handle Nested Attributes
        else:
            string = str(element[0])+'_'+str(element[1])
            output[string]=data[element[0]][element[1]]
  
    #Need to also add the company name to output dictionary.
    #Add all companies tweet applies to in list
    attached_company = []
    
    for company in tickers:
        if data['text'].find(company) > -1:
            attached_company.append(company[1:])
            
    
    output['Company']=attached_company             
    
    return output

#This is a basic listener that just prints received tweets to stdout.
class TweetListener(StreamListener):
    
    def on_data(self, data):
        try:
            datajson = json.loads(data)
            filtered = filter_attr(datajson)
            #Check to see if a valid tweet
            if filtered['Company'] and filtered['lang']=='en':
                
                print(filtered)
                #Add counter to count stocks. 
                REDIS.incr('Twitter_Stock_Count')
                #---------- Insert to Kinesis Stream --------------
                response = kinesis.put_record(StreamName="Twitter_Stream", Data=json.dumps(filtered), PartitionKey="partitionkey")
                return True
            
        except Exception as e:
            print(e)

    def on_error(self, status):
        print('The error code is: ' + repr(status))
        #Continue even if there is an error
        return True



In [None]:
REDIS.set('Twitter_Stock_Count', 0)

#This handles Twitter authetication and the connection to Twitter Streaming API
tweetlist = TweetListener(api=API(wait_on_rate_limit=True,wait_on_rate_limit_notify=True))
auth = OAuthHandler(CONSUMER_KEY,CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN,ACCESS_TOKEN_SECRET)
stream = Stream(auth, tweetlist)

#Filters by the ticker names
print('Filtering: ' + str(tickers))
stream.filter(track=tickers)