In [1]:
from pyspark import SparkContext,SparkConf
from pyspark.sql import SparkSession 
from pyspark.streaming import StreamingContext
from pyspark.streaming.kafka import KafkaUtils
from pyspark.sql import Row,SQLContext
import sys
import json

In [2]:
ss = SparkSession.Builder() \
     .appName("Realtime") \
     .config("spark.jars", "./spark-streaming-kafka-0-8-assembly_2.11-2.0.0-preview.jar") \
     .getOrCreate()
sc = ss.sparkContext
ssc = StreamingContext(sc, 1)
ssc.checkpoint("checkpoint_TwitterApp")
ss.sparkContext.setLogLevel('WARN')

21/12/31 03:46:30 WARN Utils: Your hostname, sky-ubuntu resolves to a loopback address: 127.0.0.1; using 192.168.1.13 instead (on interface enp2s0)
21/12/31 03:46:30 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
21/12/31 03:46:41 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


In [3]:
broker = '127.0.0.1:9092'
topic='tweet'

In [4]:
ks = KafkaUtils.createDirectStream(ssc, [topic], kafkaParams={"metadata.broker.list":broker})

In [5]:
def get_sql_context_instance(spark_context):
    if ('sqlContextSingletonInstance' not in globals()):
        globals()['sqlContextSingletonInstance'] = SQLContext(spark_context)
    return globals()['sqlContextSingletonInstance']
def process_rdd(time, rdd):
    print("----------- %s -----------" % str(time))
    try:
        # Get spark sql singleton context from the current context
        sql_context = get_sql_context_instance(rdd.context)
        # convert the RDD to Row RDD
        row_rdd = rdd.map(lambda w: Row(hashtag=w[0], hashtag_count=w[1]))
        # create a DF from the Row RDD
        hashtags_df = sql_context.createDataFrame(row_rdd)
        # Register the dataframe as table
        hashtags_df.registerTempTable("hashtags")
        # get the top 10 hashtags from the table using SQL and print them
        hashtag_counts_df = sql_context.sql("select hashtag, hashtag_count from hashtags order by hashtag_count desc limit 10")
        hashtag_counts_df.show()
        # call this method to prepare top 10 hashtags DF and send them
#         send_df_to_dashboard(hashtag_counts_df)
    except:
        e = sys.exc_info()[0]
        print("Error: %s" % e)

In [6]:
def aggregate_tags_count(new_values, total_sum):
    return sum(new_values) + (total_sum or 0)

In [7]:
def filter(x):
    if x[0] == '#':
        if len(x) != 1:
            return True
    return False

In [None]:
lines = ks.map(lambda x: json.loads(x[1])['text'] )
lines = lines.window(30,10)
words = lines.flatMap(lambda line: line.split())
hashtags = words.filter(lambda w: filter(w)).map(lambda x: (x, 1))
tags_totals = hashtags.updateStateByKey(aggregate_tags_count)
tags_totals.foreachRDD(process_rdd)
ssc.start()
ssc.awaitTermination()

----------- 2021-12-31 03:46:53 -----------


                                                                                

+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
|       #TraxxasXMaxx|            1|
|#FastestNameInRad...|            1|
|    #TraxxasFanPhoto|            1|
|                 #RC|            1|
|            #Traxxas|            1|
+--------------------+-------------+

----------- 2021-12-31 03:47:03 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
|       #TraxxasXMaxx|            2|
|#FastestNameInRad...|            2|
|    #TraxxasFanPhoto|            2|
|                 #RC|            2|
|            #Traxxas|            2|
|               #현진|            1|
|      #sugarskullrug|            1|
|#skullsteelingwhe...|            1|
|           #skullmat|            1|
|#dayofthedeadcari...|            1|
+--------------------+-------------+

----------- 2021-12-31 03:47:13 -----------
+--------------------+-------------+
|             hashtag|ha

----------- 2021-12-31 03:49:33 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
|         #의령출장샵|            9|
| #너의_이모지가_들려|            9|
|     #의령출장아가씨|            6|
|                  #!|            6|
|              #MONEY|            6|
|               #Lisa|            6|
|                 #16|            6|
|           #의령콜걸|            6|
|        #BLACKPINK's|            6|
|#BillboardHotTren...|            6|
+--------------------+-------------+

----------- 2021-12-31 03:49:43 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
|         #의령출장샵|            9|
| #너의_이모지가_들려|            9|
|     #의령출장아가씨|            6|
|                  #!|            6|
|              #MONEY|            6|
|               #Lisa|            6|
|                 #16|            6|
|           #의령콜걸|            6|
|        #BLACKPINK's|            

+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
| #너의_이모지가_들려|           18|
|         #의령출장샵|            9|
|        #ranboorecap|            9|
|               #Hokk|            9|
|              #MONEY|            9|
|               #Lisa|            9|
|                 #16|            9|
|        #BLACKPINK's|            9|
|#BillboardHotTren...|            9|
|             #JENNIE|            6|
+--------------------+-------------+

----------- 2021-12-31 03:52:13 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
| #너의_이모지가_들려|           18|
|              #MONEY|           10|
|               #Lisa|           10|
|                 #16|           10|
|        #BLACKPINK's|           10|
|#BillboardHotTren...|           10|
|         #의령출장샵|            9|
|        #ranboorecap|            9|
|               #Hokk|            9|
|             #JENN

----------- 2021-12-31 03:54:33 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
| #너의_이모지가_들려|           27|
|              #MONEY|           21|
|               #Lisa|           21|
|                 #16|           21|
|        #BLACKPINK's|           21|
|#BillboardHotTren...|           21|
|                #BSC|           15|
|               #현진|           12|
|            #Hyunjin|           12|
|     #TubaBüyüküstün|           10|
+--------------------+-------------+

----------- 2021-12-31 03:54:43 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
| #너의_이모지가_들려|           27|
|              #MONEY|           21|
|               #Lisa|           21|
|                 #16|           21|
|        #BLACKPINK's|           21|
|#BillboardHotTren...|           21|
|                #BSC|           17|
|               #현진|           12|
|     

----------- 2021-12-31 03:57:03 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
|              #MONEY|           27|
| #너의_이모지가_들려|           27|
|               #Lisa|           27|
|                 #16|           27|
|        #BLACKPINK's|           27|
|#BillboardHotTren...|           27|
|                #BSC|           21|
|     #TubaBüyüküstün|           15|
|       #EnginAkyürek|           15|
|             #MavSan|           15|
+--------------------+-------------+

----------- 2021-12-31 03:57:13 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
|              #MONEY|           27|
| #너의_이모지가_들려|           27|
|               #Lisa|           27|
|                 #16|           27|
|        #BLACKPINK's|           27|
|#BillboardHotTren...|           27|
|                #BSC|           21|
|     #TubaBüyüküstün|           15|
| 

----------- 2021-12-31 03:59:33 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
| #너의_이모지가_들려|           36|
|              #MONEY|           34|
|                 #16|           34|
|               #Lisa|           34|
|        #BLACKPINK's|           34|
|#BillboardHotTren...|           34|
|                #BSC|           25|
|               #현진|           21|
|            #Hyunjin|           21|
|     #TubaBüyüküstün|           15|
+--------------------+-------------+

----------- 2021-12-31 03:59:43 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
| #너의_이모지가_들려|           37|
|              #MONEY|           36|
|                 #16|           36|
|               #Lisa|           36|
|        #BLACKPINK's|           36|
|#BillboardHotTren...|           36|
|                #BSC|           26|
|               #현진|           22|
|     

----------- 2021-12-31 04:02:03 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
|              #MONEY|           45|
|               #Lisa|           45|
|                 #16|           45|
|        #BLACKPINK's|           45|
|#BillboardHotTren...|           45|
| #너의_이모지가_들려|           39|
|                #BSC|           30|
|               #현진|           24|
|        #ranboorecap|           24|
|            #Hyunjin|           24|
+--------------------+-------------+

----------- 2021-12-31 04:02:13 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
|              #MONEY|           45|
|               #Lisa|           45|
|                 #16|           45|
|        #BLACKPINK's|           45|
|#BillboardHotTren...|           45|
| #너의_이모지가_들려|           39|
|                #BSC|           30|
|               #현진|           24|
|     

----------- 2021-12-31 04:04:33 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
| #너의_이모지가_들려|           54|
|              #MONEY|           51|
|#BillboardHotTren...|           51|
|               #Lisa|           51|
|                 #16|           51|
|        #BLACKPINK's|           51|
|            #Hyunjin|           30|
|                #BSC|           30|
|               #현진|           30|
|        #ranboorecap|           24|
+--------------------+-------------+

----------- 2021-12-31 04:04:43 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
| #너의_이모지가_들려|           54|
|              #MONEY|           52|
|#BillboardHotTren...|           52|
|               #Lisa|           52|
|                 #16|           52|
|        #BLACKPINK's|           52|
|            #Hyunjin|           30|
|                #BSC|           30|
|   

----------- 2021-12-31 04:07:03 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
|              #MONEY|           60|
|#BillboardHotTren...|           60|
| #너의_이모지가_들려|           60|
|               #Lisa|           60|
|                 #16|           60|
|        #BLACKPINK's|           60|
|            #Hyunjin|           30|
|                #BSC|           30|
|               #현진|           30|
|        #ranboorecap|           24|
+--------------------+-------------+

----------- 2021-12-31 04:07:13 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
| #너의_이모지가_들려|           61|
|              #MONEY|           60|
|#BillboardHotTren...|           60|
|               #Lisa|           60|
|                 #16|           60|
|        #BLACKPINK's|           60|
|               #현진|           31|
|            #Hyunjin|           31|
|     

----------- 2021-12-31 04:09:33 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
|              #MONEY|           69|
|#BillboardHotTren...|           69|
|               #Lisa|           69|
|                 #16|           69|
|        #BLACKPINK's|           69|
| #너의_이모지가_들려|           63|
|            #Hyunjin|           33|
|                #BSC|           33|
|               #현진|           33|
|        #ranboorecap|           24|
+--------------------+-------------+

----------- 2021-12-31 04:09:43 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
|              #MONEY|           69|
|#BillboardHotTren...|           69|
|               #Lisa|           69|
|                 #16|           69|
|        #BLACKPINK's|           69|
| #너의_이모지가_들려|           63|
|            #Hyunjin|           33|
|                #BSC|           33|
|   

----------- 2021-12-31 04:12:03 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
|              #MONEY|           81|
|                 #16|           81|
|               #Lisa|           81|
|        #BLACKPINK's|           81|
|#BillboardHotTren...|           81|
| #너의_이모지가_들려|           66|
|                #BSC|           36|
|            #Hyunjin|           33|
|               #현진|           33|
|        #ranboorecap|           28|
+--------------------+-------------+

----------- 2021-12-31 04:12:13 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
|              #MONEY|           81|
|                 #16|           81|
|               #Lisa|           81|
|        #BLACKPINK's|           81|
|#BillboardHotTren...|           81|
| #너의_이모지가_들려|           67|
|                #BSC|           36|
|            #Hyunjin|           33|
|   

----------- 2021-12-31 04:14:33 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
|              #MONEY|           90|
|                 #16|           90|
|               #Lisa|           90|
|        #BLACKPINK's|           90|
|#BillboardHotTren...|           90|
| #너의_이모지가_들려|           75|
|            #Hyunjin|           36|
|                #BSC|           36|
|               #현진|           36|
|        #ranboorecap|           33|
+--------------------+-------------+

----------- 2021-12-31 04:14:43 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
|              #MONEY|           90|
|                 #16|           90|
|               #Lisa|           90|
|        #BLACKPINK's|           90|
|#BillboardHotTren...|           90|
| #너의_이모지가_들려|           75|
|            #Hyunjin|           36|
|                #BSC|           36|
|   

----------- 2021-12-31 04:17:03 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
|              #MONEY|           90|
|                 #16|           90|
|               #Lisa|           90|
|        #BLACKPINK's|           90|
|#BillboardHotTren...|           90|
| #너의_이모지가_들려|           75|
|                #BSC|           45|
|        #ranboorecap|           37|
|            #Hyunjin|           36|
|               #현진|           36|
+--------------------+-------------+

----------- 2021-12-31 04:17:13 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
|              #MONEY|           90|
|                 #16|           90|
|               #Lisa|           90|
|        #BLACKPINK's|           90|
|#BillboardHotTren...|           90|
| #너의_이모지가_들려|           75|
|                #BSC|           45|
|        #ranboorecap|           38|
|   

----------- 2021-12-31 04:19:33 -----------
+--------------------+-------------+
|             hashtag|hashtag_count|
+--------------------+-------------+
|              #MONEY|           99|
|                 #16|           99|
|               #Lisa|           99|
|        #BLACKPINK's|           99|
|#BillboardHotTren...|           99|
| #너의_이모지가_들려|           80|
|                #BSC|           48|
|        #ranboorecap|           39|
|            #Hyunjin|           36|
|               #현진|           36|
+--------------------+-------------+



21/12/31 04:19:43 ERROR DirectKafkaInputDStream: ArrayBuffer(java.nio.channels.ClosedChannelException)
