In [1]:
import re
import apache_beam as beam
from apache_beam.runners.interactive.interactive_runner import InteractiveRunner
import apache_beam.runners.interactive.interactive_beam as ib

In [2]:
p_small = beam.Pipeline(InteractiveRunner())

small_data = [
    {"0":"son's","1":29},{"0":"to's","1":580}
]

for i in range(1000):
    small_data.append({"0":"abc"+str(i), "1":i % 100})

pcoll_small = p_small | beam.Create(small_data)

In [3]:
class ReadWordsFromText(beam.PTransform):
    
    def __init__(self, file_pattern):
        self._file_pattern = file_pattern
    
    def expand(self, pcoll):
        return (pcoll.pipeline
                | beam.io.ReadFromText(self._file_pattern)
                | beam.FlatMap(lambda line: re.findall(r'[\w\']+', line.strip(), re.UNICODE)))

In [4]:
p = beam.Pipeline(InteractiveRunner())

In [5]:
words = (p 
         | 'read' >> ReadWordsFromText('gs://apache-beam-samples/shakespeare/kinglear.txt')
         | 'lower' >> beam.Map(lambda word: word.lower()))
import random

data = words | beam.Map(lambda word: (word, random.randint(1, 101)))

In [6]:
counts = (words 
          | 'count' >> beam.combiners.Count.PerElement())

In [7]:
def cross_join(left, rights):
    for x in rights:
        if left[0] == x:
            yield (left, x)

In [8]:
words_with_counts_bigger_than_100 = counts | beam.Filter(lambda count: count[1] > 100) | beam.Map(lambda count: count[0])
data_with_word_counts_bigger_than_100 = data | beam.FlatMap(cross_join, rights=beam.pvalue.AsIter(words_with_counts_bigger_than_100))

In [9]:
import apache_beam as beam
from apache_beam.runners.interactive import interactive_runner
import apache_beam.runners.interactive.interactive_beam as ib
from apache_beam.transforms import trigger
from apache_beam.options import pipeline_options
from apache_beam.options.pipeline_options import GoogleCloudOptions
from datetime import timedelta
import google.auth
import json
import pandas as pd

# The Google Cloud PubSub topic that we are reading from for this example.
topic = "projects/pubsub-public-data/topics/taxirides-realtime"

# Setting up the Beam pipeline options.
options = pipeline_options.PipelineOptions()

# Sets the pipeline mode to streaming, so we can stream the data from PubSub.
options.view_as(pipeline_options.StandardOptions).streaming = True

# Sets the project to the default project in your current Google Cloud environment.
# The project will be used for creating a subscription to the PubSub topic.
_, options.view_as(GoogleCloudOptions).project = google.auth.default()

ib.options.capture_duration = timedelta(seconds=30)

p_taxi = beam.Pipeline(interactive_runner.InteractiveRunner(), options=options)

data = p_taxi | "read" >> beam.io.ReadFromPubSub(topic=topic) | beam.Map(json.loads)

windowed_data = (data | "window" >> beam.WindowInto(beam.window.SlidingWindows(10, 1)))

In [10]:
from datetime import timedelta
from apache_beam.options import pipeline_options
from apache_beam.options.pipeline_options import GoogleCloudOptions
import google.auth
topic = "projects/pubsub-public-data/topics/shakespeare-kinglear"

# Setting up the Apache Beam pipeline options.
options = pipeline_options.PipelineOptions()
# Sets the pipeline mode to streaming, so we can stream the data from PubSub.
options.view_as(pipeline_options.StandardOptions).streaming = True
# Sets the project to the default project in your current Google Cloud environment.
# The project will be used for creating a subscription to the Pub/Sub topic.
_, options.view_as(GoogleCloudOptions).project = google.auth.default()

In [11]:
from apache_beam.runners.interactive import interactive_beam as ib

ib.options.capture_duration = timedelta(seconds=30)

pstreaming = beam.Pipeline(InteractiveRunner(), options=options)

words = (pstreaming
            | 'Read' >> beam.io.ReadFromPubSub(topic=topic)
            | 'Window' >> beam.WindowInto(beam.window.FixedWindows(5)))

In [None]:
from bokeh.plotting import figure, output_notebook, show
from bokeh.embed import json_item
from bokeh.resources import INLINE

output_notebook(INLINE)
# prepare some data
x = [1, 2, 3, 4, 5]
y = [6, 7, 2, 4, 5]

# create a new plot with a title and axis labels
p = figure(title="simple line example", x_axis_label='x', y_axis_label='y')

# add a line renderer with legend and line thickness
p.line(x, y, legend_label="Temp.", line_width=2)

# show(p)

In [None]:
import json
item_text = json.dumps(json_item(p, 'aplot'))

In [None]:
%%html

<script type="text/javascript" src="https://cdn.bokeh.org/bokeh/release/bokeh-2.1.0.min.js" integrity="sha384-YobFyzPeVUsFQydHkJGsJL1kyfHnWxOlPc3EwaV22TmBaeGoXHLWx5aRRVPS9xlE" crossorigin="anonymous"></script>
<script type="text/javascript" src="https://cdn.bokeh.org/bokeh/release/bokeh-widgets-2.1.0.min.js" integrity="sha384-NuAg9+TcTQQqvQCTtkCneRrpkTiMhhfiq0KHiBzx8ECiKiLWXHN6i6ia3q7b3eHu" crossorigin="anonymous"></script>
<script type="text/javascript" src="https://cdn.bokeh.org/bokeh/release/bokeh-tables-2.1.0.min.js" integrity="sha384-uMVqQc8JqHitD67bXTn9a06Mrk3EiHRaZ18EJENQenAKJ/KL71SakdXYomZQpGRr" crossorigin="anonymous"></script>
<script type="text/javascript" src="https://cdn.bokeh.org/bokeh/release/bokeh-gl-2.1.0.min.js" integrity="sha384-u+eGuEXC8aw0VSCm2mH+b/tQEAitUOYiR1H6SuIVEdUmXsf4vN8m/SmXpmjb7U/X" crossorigin="anonymous"></script>
<script type="text/javascript" src="https://cdn.bokeh.org/bokeh/release/bokeh-api-2.1.0.min.js" integrity="sha384-xIZ6DggyeeSB9o8lHxPnUJ4VvsdRuiKdRbWJwKIyqMvCe2PSJUnosrCjOBeBVjou" crossorigin="anonymous"></script>

<div id='aplot'>
</div>

In [None]:
%%javascript
let item_text = '{"target_id": "aplot", "root_id": "1785", "doc": {"roots": {"references": [{"attributes": {}, "id": "1792", "type": "LinearScale"}, {"attributes": {"axis_label": "y", "formatter": {"id": "1825"}, "ticker": {"id": "1801"}}, "id": "1800", "type": "LinearAxis"}, {"attributes": {}, "id": "1790", "type": "DataRange1d"}, {"attributes": {"label": {"value": "Temp."}, "renderers": [{"id": "1821"}]}, "id": "1832", "type": "LegendItem"}, {"attributes": {}, "id": "1807", "type": "SaveTool"}, {"attributes": {"below": [{"id": "1796"}], "center": [{"id": "1799"}, {"id": "1803"}, {"id": "1831"}], "left": [{"id": "1800"}], "renderers": [{"id": "1821"}], "title": {"id": "1786"}, "toolbar": {"id": "1811"}, "x_range": {"id": "1788"}, "x_scale": {"id": "1792"}, "y_range": {"id": "1790"}, "y_scale": {"id": "1794"}}, "id": "1785", "subtype": "Figure", "type": "Plot"}, {"attributes": {"data_source": {"id": "1818"}, "glyph": {"id": "1819"}, "hover_glyph": null, "muted_glyph": null, "nonselection_glyph": {"id": "1820"}, "selection_glyph": null, "view": {"id": "1822"}}, "id": "1821", "type": "GlyphRenderer"}, {"attributes": {}, "id": "1808", "type": "ResetTool"}, {"attributes": {}, "id": "1830", "type": "UnionRenderers"}, {"attributes": {}, "id": "1805", "type": "WheelZoomTool"}, {"attributes": {}, "id": "1827", "type": "BasicTickFormatter"}, {"attributes": {"overlay": {"id": "1810"}}, "id": "1806", "type": "BoxZoomTool"}, {"attributes": {"source": {"id": "1818"}}, "id": "1822", "type": "CDSView"}, {"attributes": {}, "id": "1809", "type": "HelpTool"}, {"attributes": {}, "id": "1794", "type": "LinearScale"}, {"attributes": {}, "id": "1825", "type": "BasicTickFormatter"}, {"attributes": {"bottom_units": "screen", "fill_alpha": 0.5, "fill_color": "lightgrey", "left_units": "screen", "level": "overlay", "line_alpha": 1.0, "line_color": "black", "line_dash": [4, 4], "line_width": 2, "right_units": "screen", "top_units": "screen"}, "id": "1810", "type": "BoxAnnotation"}, {"attributes": {}, "id": "1829", "type": "Selection"}, {"attributes": {"axis_label": "x", "formatter": {"id": "1827"}, "ticker": {"id": "1797"}}, "id": "1796", "type": "LinearAxis"}, {"attributes": {"data": {"x": [1, 2, 3, 4, 5], "y": [6, 7, 2, 4, 5]}, "selected": {"id": "1829"}, "selection_policy": {"id": "1830"}}, "id": "1818", "type": "ColumnDataSource"}, {"attributes": {"axis": {"id": "1800"}, "dimension": 1, "ticker": null}, "id": "1803", "type": "Grid"}, {"attributes": {"line_color": "#1f77b4", "line_width": 2, "x": {"field": "x"}, "y": {"field": "y"}}, "id": "1819", "type": "Line"}, {"attributes": {}, "id": "1797", "type": "BasicTicker"}, {"attributes": {"active_drag": "auto", "active_inspect": "auto", "active_multi": null, "active_scroll": "auto", "active_tap": "auto", "tools": [{"id": "1804"}, {"id": "1805"}, {"id": "1806"}, {"id": "1807"}, {"id": "1808"}, {"id": "1809"}]}, "id": "1811", "type": "Toolbar"}, {"attributes": {"line_alpha": 0.1, "line_color": "#1f77b4", "line_width": 2, "x": {"field": "x"}, "y": {"field": "y"}}, "id": "1820", "type": "Line"}, {"attributes": {"text": "simple line example"}, "id": "1786", "type": "Title"}, {"attributes": {}, "id": "1788", "type": "DataRange1d"}, {"attributes": {"items": [{"id": "1832"}]}, "id": "1831", "type": "Legend"}, {"attributes": {"axis": {"id": "1796"}, "ticker": null}, "id": "1799", "type": "Grid"}, {"attributes": {}, "id": "1804", "type": "PanTool"}, {"attributes": {}, "id": "1801", "type": "BasicTicker"}], "root_ids": ["1785"]}, "title": "", "version": "2.1.0"}}';
Bokeh.embed.embed_item(JSON.parse(item_text), "aplot");

In [None]:
from apache_beam.runners.interactive.display import pipeline_graph as pg
from apache_beam.runners.interactive import pipeline_fragment as pf

In [None]:
from apache_beam.runners.interactive import interactive_environment as ie

ie.current_env().inspector.list_inspectables()

In [None]:
ie.current_env().inspector.get_pcoll_data('eb436bfed4b4592f1eb790065817c629')

In [None]:
pg.PipelineGraph(pf.PipelineFragment([counts]).deduce_fragment()).display_graph()

In [None]:
from apache_beam.runners.interactive import pipeline_instrument as pi

pg.PipelineGraph(pi.build_pipeline_instrument(pf.PipelineFragment([counts]).deduce_fragment())._pipeline).display_graph()

In [None]:
pg.PipelineGraph(p).display_graph()