## Summary

## Install dependencies

In [None]:
try:
    import google.colab
    GOOGLE_COLAB = True
except ImportError:
    GOOGLE_COLAB = False

In [None]:
if GOOGLE_COLAB:
    !pip install apache_beam

## Imports

In [None]:
import itertools
import tempfile

import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.runners.direct import direct_runner
from apache_beam.runners.interactive import interactive_runner

In [None]:
%matplotlib inline

## Parameters

In [None]:
NOTEBOOK_NAME = "demo_pipeline_segmentation"

In [None]:
temp_dir = tempfile.mkdtemp(prefix="beam-temp-")

In [None]:
options = PipelineOptions(runner="DirectRunner", temp_location=temp_dir)
print(options.display_data())

## Workflow

In [None]:
runner = interactive_runner.InteractiveRunner(render_option="graph")

In [None]:
p = beam.Pipeline(runner=runner, options=options)

### Populate input cache

In [None]:
input_pcoll = p | beam.Create(range(10))

In [None]:
result = p.run()

In [None]:
for i in itertools.islice(result.get(input_pcoll), 5):
    print(i)

### Squares

In [None]:
squares = input_pcoll | 'Square' >> beam.Map(lambda x: x*x)

In [None]:
result = p.run()

In [None]:
for i in itertools.islice(result.get(squares), 5):
    print(i)

### Cubes

In [None]:
cubes = input_pcoll | 'Cube' >> beam.Map(lambda x: x**3)

In [None]:
result = p.run()

In [None]:
for i in itertools.islice(result.get(cubes), 5):
    print(i)

### Plot

In [None]:
init_list = list(range(10))
squares_list = list(result.get(squares))
cubes_list = list(result.get(cubes))

squares_list.sort()
cubes_list.sort()

from matplotlib import pyplot as plt
plt.scatter(init_list, squares_list, label='squares', color='red')
plt.scatter(init_list, cubes_list, label='cubes', color='blue')
plt.legend(loc='upper left')
plt.show()