In [1]:
import apache_beam as beam
#beam.FlatMap():
    #take a single input and return a list of element
    #example, take a string and return a list of string
with beam.Pipeline() as pipeline:
  plants = (
      pipeline
      | 'Gardening plants' >> beam.Create([
          '🍓Strawberry 🥕Carrot 🍆Eggplant',
          '🍅Tomato 🥔Potato',
      ])
      | 'Split words' >> beam.FlatMap(str.split)
      | beam.Map(print))



🍓Strawberry
🥕Carrot
🍆Eggplant
🍅Tomato
🥔Potato


In [2]:
import apache_beam as beam

#Example of using beam.FlatMap with function
def split_words(text):
  return text.split(',')

with beam.Pipeline() as pipeline:
  plants = (
      pipeline
      | 'Gardening plants' >> beam.Create([
          '🍓Strawberry,🥕Carrot,🍆Eggplant',
          '🍅Tomato,🥔Potato',
      ])
      | 'Split words' >> beam.FlatMap(split_words)
      | beam.Map(print))

🍓Strawberry
🥕Carrot
🍆Eggplant
🍅Tomato
🥔Potato


In [3]:
import apache_beam as beam

#Example of using beam.FlatMap with lambda function.
with beam.Pipeline() as pipeline:
  plants = (
      pipeline
      | 'Gardening plants' >> beam.Create([
          ['🍓Strawberry', '🥕Carrot', '🍆Eggplant'],
          ['🍅Tomato', '🥔Potato'],
      ])
      | 'Flatten lists' >> beam.FlatMap(lambda elements: elements)
      | beam.Map(print))

🍓Strawberry
🥕Carrot
🍆Eggplant
🍅Tomato
🥔Potato


In [4]:
import apache_beam as beam
#Example of using beam.FlatMap with generator function
def generate_elements(elements):
  for element in elements:
    yield element

with beam.Pipeline() as pipeline:
  plants = (
      pipeline
      | 'Gardening plants' >> beam.Create([
          ['🍓Strawberry', '🥕Carrot', '🍆Eggplant'],
          ['🍅Tomato', '🥔Potato'],
      ])
      | 'Flatten lists' >> beam.FlatMap(generate_elements)
      | beam.Map(print))

🍓Strawberry
🥕Carrot
🍆Eggplant
🍅Tomato
🥔Potato


In [5]:
import apache_beam as beam

#Example of using beam.FlatMap()
  #FlatMapTuple know each element is a tuple therefore each element inside tupe is in seperate varialbe inside the function
def format_plant(icon, plant):
  if icon:
    yield '{}{}'.format(icon, plant)

with beam.Pipeline() as pipeline:
  plants = (
      pipeline
      | 'Gardening plants' >> beam.Create([
          ('🍓', 'Strawberry'),
          ('🥕', 'Carrot'),
          ('🍆', 'Eggplant'),
          ('🍅', 'Tomato'),
          ('🥔', 'Potato'),
          (None, 'Invalid'),
      ])
      | 'Format' >> beam.FlatMapTuple(format_plant)
      | beam.Map(print))

🍓Strawberry
🥕Carrot
🍆Eggplant
🍅Tomato
🥔Potato


In [6]:
import apache_beam as beam

#Example of using beam.FlatMap() with function that make multiple inpute other than pvalue
def split_words(text, delimiter=None):
  return text.split(delimiter)

with beam.Pipeline() as pipeline:
  plants = (
      pipeline
      | 'Gardening plants' >> beam.Create([
          '🍓Strawberry,🥕Carrot,🍆Eggplant',
          '🍅Tomato,🥔Potato',
      ])
      | 'Split words' >> beam.FlatMap(split_words, delimiter=',')
      | beam.Map(print))

🍓Strawberry
🥕Carrot
🍆Eggplant
🍅Tomato
🥔Potato


In [7]:
import apache_beam as beam

#Example of beam.FlatMap() wit input is SingleTon from other pCollection
with beam.Pipeline() as pipeline:
  delimiter = pipeline | 'Create delimiter' >> beam.Create([','])

  plants = (
      pipeline
      | 'Gardening plants' >> beam.Create([
          '🍓Strawberry,🥕Carrot,🍆Eggplant',
          '🍅Tomato,🥔Potato',
      ])
      | 'Split words' >> beam.FlatMap(
          lambda text,
          delimiter: text.split(delimiter),
          delimiter=beam.pvalue.AsSingleton(delimiter),
      )
      | beam.Map(print))

🍓Strawberry
🥕Carrot
🍆Eggplant
🍅Tomato
🥔Potato


In [None]:
import apache_beam as beam

#Example of using beam.FlatMap with side input is Iter
def normalize_and_validate_durations(plant, valid_durations):
  plant['duration'] = plant['duration'].lower()
  if plant['duration'] in valid_durations:
    yield plant

with beam.Pipeline() as pipeline:
  valid_durations = pipeline | 'Valid durations' >> beam.Create([
      'annual',
      'biennial',
      'perennial',
  ])

  valid_plants = (
      pipeline
      | 'Gardening plants' >> beam.Create([
          {
              'icon': '🍓', 'name': 'Strawberry', 'duration': 'Perennial'
          },
          {
              'icon': '🥕', 'name': 'Carrot', 'duration': 'BIENNIAL'
          },
          {
              'icon': '🍆', 'name': 'Eggplant', 'duration': 'perennial'
          },
          {
              'icon': '🍅', 'name': 'Tomato', 'duration': 'annual'
          },
          {
              'icon': '🥔', 'name': 'Potato', 'duration': 'unknown'
          },
      ])
      | 'Normalize and validate durations' >> beam.FlatMap(
          normalize_and_validate_durations,
          valid_durations=beam.pvalue.AsIter(valid_durations),
      )
      | beam.Map(print))