Skip to content

Commit

Permalink
clean up examples
Browse files Browse the repository at this point in the history
  • Loading branch information
giwa committed Aug 18, 2014
1 parent b8d7d24 commit 189dcea
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 17 deletions.
10 changes: 4 additions & 6 deletions examples/src/main/python/streaming/network_wordcount.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,10 @@
ssc = StreamingContext(conf=conf, duration=Seconds(1))

lines = ssc.socketTextStream(sys.argv[1], int(sys.argv[2]))
fm_lines = lines.flatMap(lambda x: x.split(" "))
mapped_lines = fm_lines.map(lambda x: (x, 1))
reduced_lines = mapped_lines.reduceByKey(add)
words = lines.flatMap(lambda line: line.split(" "))
mapped_words = words.map(lambda word: (word, 1))
count = mapped_words.reduceByKey(add)

reduced_lines.pyprint()
count_lines = mapped_lines.count()
count_lines.pyprint()
count.pyprint()
ssc.start()
ssc.awaitTermination()
15 changes: 4 additions & 11 deletions examples/src/main/python/streaming/wordcount.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,14 @@
exit(-1)
conf = SparkConf()
conf.setAppName("PythonStreamingWordCount")
conf.set("spark.default.parallelism", 1)

# still has a bug
# ssc = StreamingContext(appName="PythonStreamingWordCount", duration=Seconds(1))
ssc = StreamingContext(conf=conf, duration=Seconds(1))

lines = ssc.textFileStream(sys.argv[1])
fm_lines = lines.flatMap(lambda x: x.split(" "))
filtered_lines = fm_lines.filter(lambda line: "Spark" in line)
mapped_lines = fm_lines.map(lambda x: (x, 1))
reduced_lines = mapped_lines.reduceByKey(add)
words = lines.flatMap(lambda line: line.split(" "))
mapped_words = words.map(lambda x: (x, 1))
count = mapped_words.reduceByKey(add)

fm_lines.pyprint()
filtered_lines.pyprint()
mapped_lines.pyprint()
reduced_lines.pyprint()
count.pyprint()
ssc.start()
ssc.awaitTermination()

0 comments on commit 189dcea

Please sign in to comment.