## The data streaming classic: WordCount!!

 We can not leave this tutorial without a streaming classic example. WordCount example reads a text and counts how often words occur. 

In [32]:
from dispel4py.core import GenericPE

class SplitLines(GenericPE):

    def __init__(self):
        GenericPE.__init__(self)
        self._add_input("input")
        self._add_output("output")
        
    def _process(self, inputs):
        for line in inputs["input"].splitlines():
            self.write("output", line)

In [33]:
from dispel4py.base import IterativePE

class SplitWords(IterativePE):

    def __init__(self):
        IterativePE.__init__(self)
        
    def _process(self, data):
        for word in data.split(" "):
            self.write("output", (word,1))

In [34]:
from collections import defaultdict

class CountWords(GenericPE):
    def __init__(self):
        GenericPE.__init__(self)
        self._add_input("input", grouping=[0])
        self._add_output("output")
        self.count=defaultdict(int)
        
    def _process(self, inputs):
        word, count = inputs['input']
        self.count[word] += count
    
    def _postprocess(self):
        self.write('output', self.count)

In [35]:
from dispel4py.workflow_graph import WorkflowGraph

split = SplitLines()
words = SplitWords()
count = CountWords()

graph = WorkflowGraph()
graph.connect(split, 'output', words, 'input')
graph.connect(words, 'output', count, 'input')


In [36]:
from dispel4py.new.simple_process import process as simple_process
simple_process(graph, {split: [ {'input' : "Hello Hello algo mas World World"}] })

Inputs: {'SplitLines15': [{'input': 'Hello Hello algo mas World World'}]}
SimplePE: Processed 1 iteration.
Outputs: {'CountWords17': {'output': [defaultdict(<type 'int'>, {'World': 2, 'mas': 1, 'Hello': 2, 'algo': 1})]}}
