In [None]:
import time
import http.client
import tweepy
import nltk
import config
from kafka import KafkaProducer
from json import dumps
from afinn import Afinn

# Pre-Trained Sentiment Analysis Library
afinn = Afinn(emoticons=True)


# Uses Sentiment score to classify as negative/neutral/positive
def sentimentValue(text):
    sentiment = afinn.score(text)
    if sentiment < 0:
        return (sentiment,'negative')
    elif sentiment == 0:
        return (sentiment,'neutral')
    else:
        return (sentiment,'positive')
    
# Connect to the Kafka Cluster and register as a producer
producer = KafkaProducer(bootstrap_servers=['kafka:9092'],
                         value_serializer=lambda x: 
                         dumps(x).encode('utf-8'))


In [None]:
# Pull in the api_key, api_secret, access_token, and token_secret
auth = tweepy.OAuthHandler(config.api_key, config.api_secret)
auth.set_access_token(config.access_token, config.token_secret)


# Connect to tweepy API
api = tweepy.API(auth)


maxId=0

# Gather data from tweepy every 60 seconds from the start time
# home_timeline endpoint rate limited to 15 requests every 15 minutes
starttime=time.time()
while True:
    print('Getting Tweets at ',time.time())
    if maxId == 0 :
        public_tweets = api.home_timeline(count=200)
    else:
        public_tweets = api.home_timeline(count=200,since_id=maxId)
    
    for tweet in public_tweets:
        if maxId<tweet.id:
            maxId=tweet.id
        
        sentiment = sentimentValue(tweet.text)
        payload = {'id':tweet.id,\
                   'text':tweet.text,\
                   'created_at':str(tweet.created_at),\
                   'sentimentScore':sentiment[0],\
                   'sentiment':sentiment[1]}
        
        producer.send('incoming_tweets',payload)
    
    # Sleep at least 60 seconds from the previous iteration
    time.sleep(60.0 - ((time.time() - starttime) % 60.0))

In [None]:
maxId=0

# What keyword to use in the twitter search function
keyword = 'cheese'

# Gather data from tweepy every 5 seconds from the start time
# search endpoint rate limited to 180 requests every 15 minutes
starttime=time.time()
while True:
    print('Getting Tweets at ',time.time())
    if maxId == 0 :
        public_tweets = api.search(keyword,result_type='mixed',count=100)
    else:
        public_tweets = api.search(keyword,since_id=maxId,result_type='mixed',count=100)
    
    for tweet in public_tweets:
        if maxId<tweet.id:
            maxId=tweet.id
        
        sentiment = sentimentValue(tweet.text)
        payload = {'id':tweet.id,\
                   'text':tweet.text,\
                   'created_at':str(tweet.created_at),\
                   'sentimentScore':sentiment[0],\
                   'sentiment':sentiment[1]}
        
        producer.send('incoming_tweets',payload)
        
    # Sleep at least 5 seconds from the previous iteration
    time.sleep(5.0- ((time.time() - starttime) % 5.0))

