In [None]:
from __future__ import print_function
import sys
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
from pyspark.sql import Row, SparkSession
import io
import requests
import re
from textblob import TextBlob

In [None]:
def getSparkSessionInstance(sparkConf):
    if ('sparkSessionSingletonInstance' not in globals()):
        globals()['sparkSessionSingletonInstance'] = SparkSession\
            .builder\
            .config(conf=sparkConf)\
            .getOrCreate()
    return globals()['sparkSessionSingletonInstance']


if __name__ == "__main__":
    #if len(sys.argv) != 3:
     #   print("Usage: sql_network_wordcount.py <hostname> <port> ", file=sys.stderr)
      #   exit(-1)
    #host, port = sys.argv[1:]
    sc = SparkContext.getOrCreate()
    #Enter 3600 to get iphone sentiments for every hour. 
    ssc = StreamingContext(sc, 60) 
    #To receive more and quicker output for reporting purpose I have used 60 here.

In [None]:
socket_stream = ssc.socketTextStream("192.1.1.1", 5555)

In [None]:
#Enter 3600 to get iphone sentiments for every hour. 
#To receive more and quicker output for reporting purpose I have used 60 here.
lines = socket_stream.window(60)

In [None]:
def clean_tweet(tweet):
        return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())

In [None]:
def get_tweet_sentiment(tweet):
        # create TextBlob object of passed tweet text
        analysis = TextBlob(clean_tweet(tweet))
        # set sentiment
        if analysis.sentiment.polarity > 0:
            return 'positive'
        elif analysis.sentiment.polarity == 0:
            return 'neutral'
        else:
            return 'negative'

In [None]:
def send_df_to_dashboard(df):
    # extract the hashtags from dataframe and convert them into array
    top_tags = [str(t.Sentiment) for t in df.select("Sentiment").collect()]
    # extract the counts from dataframe and convert them into array
    tags_count = [p.Count for p in df.select("Count").collect()]
    # initialize and send the data through REST API
    url = 'http://192.1.1.1:5001/updateData'
    request_data = {'label': str(top_tags), 'data': str(tags_count)}
    response = requests.post(url, data=request_data)

In [None]:
tweety = lines.flatMap(lambda line: line.split("\n"))

In [None]:
def get_tweets(time, rdd):
        print("========= %s =========" % str(time))
        from pyspark.sql.types import NumericType
        try:
            header = ["Tweet"]
            spark = getSparkSessionInstance(rdd.context.getConf())
            spark.udf.register('get_tweet_sentiment', get_tweet_sentiment)
            rowRdd = rdd.map(lambda row: row.split("\n"))
            jsonDataFrame = spark.createDataFrame(rowRdd, header)
            jsonDataFrame.createOrReplaceTempView("tweets")
            #2. Sentiment Analysis:
            #2.1 Filter tweets & take tweets which has mention of “iphone”.
            iPhoneTweetDF = \
            spark.sql("select Tweet, get_tweet_sentiment(Tweet) as Sentiment from tweets")
            iPhoneTweetDF.show()
            sentiCountDF = \
            spark.sql("select get_tweet_sentiment(Tweet) as Sentiment, count(get_tweet_sentiment(Tweet)) as Count from tweets group by get_tweet_sentiment(Tweet)")
            sentiCountDF.show()
            send_df_to_dashboard(sentiCountDF)
        except:
            pass

In [None]:
tweety.foreachRDD(get_tweets)

In [None]:
ssc.start()

In [None]:
ssc.awaitTermination()