## Summary

## Install dependencies

In [None]:
try:
    import google.colab
    GOOGLE_COLAB = True
except ImportError:
    GOOGLE_COLAB = False

In [None]:
if GOOGLE_COLAB:
    !pip install "git+https://github.com/ostrokach/beam.git@develop#egg=apache_beam[gcp]&subdirectory=sdks/python"

## Imports

In [None]:
import itertools
import tempfile

import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.runners.interactive.cache_manager import CacheManager

In [None]:
beam.__version__

In [None]:
%matplotlib inline

## Parameters

In [None]:
NOTEBOOK_NAME = "demo_explicit_caching"

In [None]:
temp_dir = tempfile.mkdtemp(prefix="beam-temp-")
temp_dir

In [None]:
options = PipelineOptions(runner="DirectRunner", temp_location=temp_dir)
print(options.display_data())

In [None]:
cache_manager = CacheManager(options)

## Workflow

### Populate input cache

In [None]:
input_cache = cache_manager.create_cache_from_defaults()

with beam.Pipeline(options=options) as p:
    _ = (
        #
        p
        | beam.Create(list(range(10)))
        | input_cache.writer()
    )

In [None]:
for i in itertools.islice(input_cache.read(), 5):
    print(i)

### Squares

In [None]:
squares_cache = cache_manager.create_cache_from_defaults()

with beam.Pipeline(options=options) as p:
    squares = (
        #
        p
        | input_cache.reader()
        | beam.Map(lambda e: e ** 2)
        | squares_cache.writer()
    )

In [None]:
for i in itertools.islice(squares_cache.read(), 5):
    print(i)

<span style="color: blue">Should we add `.reader()` as the default `.expand()` for cache?</span>

### Cubes

In [None]:
cubes_cache = cache_manager.create_cache_from_defaults()

with beam.Pipeline(options=options) as p:
    squares = (
        #
        p
        | input_cache.reader()
        | beam.Map(lambda e: e ** 3)
        | cubes_cache.writer()
    )

In [None]:
for i in itertools.islice(cubes_cache.read(), 5):
    print(i)

### Plot

In [None]:
init_list = list(range(10))
squares_list = sorted(squares_cache.read())
cubes_list = sorted(cubes_cache.read())

from matplotlib import pyplot as plt
plt.scatter(init_list, squares_list, label='squares', color='red')
plt.scatter(init_list, cubes_list, label='cubes', color='blue')
plt.legend(loc='upper left')
plt.show()