In [None]:
from __future__ import print_function
import sys
from pyspark import SparkContext
from pyspark.streaming import StreamingContext
from pyspark.sql import Row, SparkSession
from collections import namedtuple
import io
import requests

In [None]:
def getSparkSessionInstance(sparkConf):
    if ('sparkSessionSingletonInstance' not in globals()):
        globals()['sparkSessionSingletonInstance'] = SparkSession\
            .builder\
            .config(conf=sparkConf)\
            .getOrCreate()
    return globals()['sparkSessionSingletonInstance']


if __name__ == "__main__":
    sc = SparkContext.getOrCreate()
    ssc = StreamingContext(sc, 5)

In [None]:
socket_stream = ssc.socketTextStream("192.1.1.1", 5299)

In [None]:
fields = ("tag", "count" )
Tweet = namedtuple( 'Tweet', fields )

In [None]:
lines = socket_stream.window(60)

In [None]:
words = lines.map(lambda row : row.split("|||"))

In [None]:
# Convert RDDs of the words DStream to DataFrame and run SQL query
def process(time, rdd):
        print("========= %s =========" % str(time))
        from pyspark.sql.types import NumericType
        
        try:
            header = ["Tweet","RetweetCount"]
            spark = getSparkSessionInstance(rdd.context.getConf())
            jsonDataFrame = spark.createDataFrame(rdd, header)
            jsonDataFrame = jsonDataFrame.withColumn("RetweetCount",jsonDataFrame["RetweetCount"].cast("Int"))
            jsonDataFrame.createOrReplaceTempView("tweets")
            TrendTweetDF = \
            spark.sql("select Tweet, RetweetCount from tweets order by RetweetCount desc")
            TrendTweetDF.show()
            send_df_to_dashboard(TrendTweetDF)
            
        except:
            pass

        

In [None]:
def send_df_to_dashboard(df):
    # extract the hashtags from dataframe and convert them into array
    top_tags = [str(t.Tweet) for t in df.select("Tweet").collect()]
    # extract the counts from dataframe and convert them into array
    tags_count = [p.RetweetCount for p in df.select("RetweetCount").collect()]
    # initialize and send the data through REST API
    url = 'http://192.1.1.1:5001/updateData'
    request_data = {'label': str(top_tags), 'data': str(tags_count)}
    response = requests.post(url, data=request_data)

In [None]:
words.foreachRDD(process)

In [None]:
ssc.start()

In [None]:
ssc.awaitTermination()