In [3]:
import apache_beam as beam 

with beam.Pipeline() as pipeline:
    outputs = (
        pipeline
        | beam.Create([
            '  Apple  \n', 
            '\tMango   \n', 
            '     Dates  \n', 
            '\tGrapes     ', 
            '\n Papaya \t', 
            '\t   Strawberry   \n'])
        | beam.Map(lambda fruit:fruit.strip())
        | beam.Map(print)
    )

Apple
Mango
Dates
Grapes
Papaya
Strawberry


In [4]:
def strip_extra_chars(element):
    return element.strip()

with beam.Pipeline() as pipeline:
    outputs = (
        pipeline
        | beam.Create([
            '  Apple  \n', 
            '\tMango   \n', 
            '     Dates  \n', 
            '\tGrapes     ', 
            '\n Papaya \t', 
            '\t   Strawberry   \n'])
        | beam.Map(strip_extra_chars)
        | beam.Map(print)
    )

Apple
Mango
Dates
Grapes
Papaya
Strawberry


In [10]:
def strip_extra_chars(element, chars=None):
    tr1 = element.strip(chars)
    tr2 = tr1.title()
    tr3 = tr2.upper()
    return tr3

with beam.Pipeline() as pipeline:
    outputs = (
        pipeline
        | beam.Create([
            '  Apple.  \n', 
            '\tMango   \n', 
            '     Dates.  \n', 
            '\tgrapes.     ', 
            '\n Papaya \t', 
            '\t   Strawberry   \n'])
        | beam.Map(strip_extra_chars, chars='\t\n .')
        | beam.Map(print)
    )

APPLE
MANGO
DATES
GRAPES
PAPAYA
STRAWBERRY


In [17]:
# If our PCollection consists of (key, value) pairs, we can use MapTuple to unpack them into different function arguments

with beam.Pipeline() as pipeline:
    outputs = (
        pipeline
        | beam.Create([
            ('WB', 'Kolkata'),
            ('Maharshtra', 'Mumbai'),
            ('Karnataka', 'Bangalore'),
            ('Tamilnadu', 'Chennai')
            ])
        | beam.MapTuple(lambda state, cap: f'{state} --> {cap}')
        | beam.Map(print)
    )

WB --> Kolkata
Maharshtra --> Mumbai
Karnataka --> Bangalore
Tamilnadu --> Chennai


In [22]:
def split_tuple(element):
    state, cap = element 
    # return {'state' : state, 'capital' : cap}
    return state + " --> " + cap

with beam.Pipeline() as pipeline:
    outputs = (
        pipeline
        | beam.Create([
            ('WB', 'Kolkata'),
            ('Maharshtra', 'Mumbai'),
            ('Karnataka', 'Bangalore'),
            ('Tamilnadu', 'Chennai')
            ])
        | beam.Map(split_tuple)
        # | beam.Map(lambda x: x['state'] + ' --> ' + x['capital'])
        | beam.Map(print)
    )

WB --> Kolkata
Maharshtra --> Mumbai
Karnataka --> Bangalore
Tamilnadu --> Chennai


# FlatMap

In [26]:
with beam.Pipeline() as pipeline:
    outputs = (
        pipeline
        | beam.Create([
            '  Apple  \n', 
            '\tMango   \n', 
            '     Dates  \n', 
            '\tGrapes     ', 
            '\n Papaya \t', 
            '\t   Strawberry   \n'])
        | beam.FlatMap(str.split)
        | beam.Map(print)
    )

Apple
Mango
Dates
Grapes
Papaya
Strawberry


In [33]:
with beam.Pipeline() as pipeline:
    flowers = (
        pipeline
        | beam.Create(['Lotus, Lily , Daisy, Hibiscus, Marigold', 'Fragaria, Wild Rose, Rhododendron, Primula'])
        | beam.Map(lambda x : x.split(','))
        | beam.Map(print)
    )

['Lotus', ' Lily ', ' Daisy', ' Hibiscus', ' Marigold']
['Fragaria', ' Wild Rose', ' Rhododendron', ' Primula']


In [39]:
with beam.Pipeline() as pipeline:
    flowers = (
        pipeline
        | beam.Create(['Lotus, Lily , Daisy, Hibiscus, Marigold', 'Fragaria, Wild Rose, Rhododendron, Primula'])
        | beam.Map(lambda x : x.replace(" ", ""))
        | beam.FlatMap(lambda x : x.split(','))
        | beam.Map(print)
    )

Lotus
Lily
Daisy
Hibiscus
Marigold
Fragaria
WildRose
Rhododendron
Primula


In [41]:
with beam.Pipeline() as pipeline:
    flowers = (
        pipeline
        | beam.Create([
            ['Lotus', 'Lily' , 'Daisy', 'Hibiscus', 'Marigold'], 
            ['Fragaria', 'Wild Rose', 'Rhododendron', 'Primula']
            ])
        | beam.Map(lambda x : x)
        | beam.Map(print)
    )

['Lotus', 'Lily', 'Daisy', 'Hibiscus', 'Marigold']
['Fragaria', 'Wild Rose', 'Rhododendron', 'Primula']


In [42]:
with beam.Pipeline() as pipeline:
    flowers = (
        pipeline
        | beam.Create([
            ['Lotus', 'Lily' , 'Daisy', 'Hibiscus', 'Marigold'], 
            ['Fragaria', 'Wild Rose', 'Rhododendron', 'Primula']
            ])
        | beam.FlatMap(lambda x : x)
        | beam.Map(print)
    )

Lotus
Lily
Daisy
Hibiscus
Marigold
Fragaria
Wild Rose
Rhododendron
Primula


# Filter

In [44]:
with beam.Pipeline() as pipeline:
    ip = (
        pipeline
        | beam.Create([
            {'name' : 'Belgium', 'continent' : 'Europe', 'capital' : 'Brussels'},
            {'name' : 'India', 'continent' : 'Asia', 'capital' : 'New Delhi'},
            {'name' : 'New Zealand', 'continent' : 'Oceania', 'capital' : 'Auckland'},
            {'name' : 'USA', 'continent' : 'NA', 'capital' : 'Washington DC'},
        ])
        | beam.Filter(lambda x: x['continent'] == 'Asia')
        | beam.Map(print)
    )

{'name': 'India', 'continent': 'Asia', 'capital': 'New Delhi'}


In [47]:
x = ['a', 'p', 'c', 'o', 'l', 'y', 't', 'e']
x[1], x[6]

y = {'k1': 'v1', 'k2': 'v2'}
y['k2']

'v2'