In [1]:
import requests
import pandas as pd
import apache_beam as beam

In [2]:
def fetch_data(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return response.json()
        else:
            print(f'Error {response.status_code}')
            return None
    except requests.exceptions.RequestException as e:
        print(f'Error {e}')
        return None

In [3]:
def fetch_first_50_pokemons(url):
    fetched_data = fetch_data(url)
    first_50_fetched_pokemons = fetched_data['pokemon'][:50]
    first_50_pokemon_urls = [pokemon['pokemon']['url'] for pokemon in first_50_fetched_pokemons]
    
    first_50_pokemons = []

    for pokemon_url in first_50_pokemon_urls:
        pokemon_data = fetch_data(pokemon_url)
        first_50_pokemons.append({
            'id': pokemon_data['id'],
            'name': pokemon_data['name'],
            'height': pokemon_data['height'],
            'weight': pokemon_data['weight']
        })

    return first_50_pokemons

In [4]:
api_url = 'https://pokeapi.co/api/v2/type/3'

first_50_pokemons = fetch_first_50_pokemons(api_url)
first_50_pokemons

[{'id': 6, 'name': 'charizard', 'height': 17, 'weight': 905},
 {'id': 12, 'name': 'butterfree', 'height': 11, 'weight': 320},
 {'id': 16, 'name': 'pidgey', 'height': 3, 'weight': 18},
 {'id': 17, 'name': 'pidgeotto', 'height': 11, 'weight': 300},
 {'id': 18, 'name': 'pidgeot', 'height': 15, 'weight': 395},
 {'id': 21, 'name': 'spearow', 'height': 3, 'weight': 20},
 {'id': 22, 'name': 'fearow', 'height': 12, 'weight': 380},
 {'id': 41, 'name': 'zubat', 'height': 8, 'weight': 75},
 {'id': 42, 'name': 'golbat', 'height': 16, 'weight': 550},
 {'id': 83, 'name': 'farfetchd', 'height': 8, 'weight': 150},
 {'id': 84, 'name': 'doduo', 'height': 14, 'weight': 392},
 {'id': 85, 'name': 'dodrio', 'height': 18, 'weight': 852},
 {'id': 123, 'name': 'scyther', 'height': 15, 'weight': 560},
 {'id': 130, 'name': 'gyarados', 'height': 65, 'weight': 2350},
 {'id': 142, 'name': 'aerodactyl', 'height': 18, 'weight': 590},
 {'id': 144, 'name': 'articuno', 'height': 17, 'weight': 554},
 {'id': 145, 'name': 

In [5]:
pokemons_df = pd.DataFrame(first_50_pokemons)
filepath = 'pokemons.csv'
pokemons_df.to_csv(filepath, index=False)

In [6]:
class ConvertUnits(beam.DoFn):
    def process(self, element):
        # Convert height from inches to metres
        element['height'] = element['height'] * 0.0254
        # Convert weight from lbs to kgs
        element['weight'] = element['weight'] * 0.453592
        yield element

class CalculateBMI(beam.DoFn):
    def process(self, element):
        # Calculate BMI
        bmi = element['weight'] / (element['height'] ** 2)
        # Round BMI to two decimal places
        element['bmi'] = round(bmi, 2)
        yield element

In [7]:
# Create a pipeline using Apache Beam
with beam.Pipeline() as pipeline:
    transformed_data = (
        pipeline
        | 'Create Pokemon Collections' >> beam.Create(first_50_pokemons)
        | 'Convert Units' >> beam.ParDo(ConvertUnits())
        | 'Calculate BMI' >> beam.ParDo(CalculateBMI())
    )

    # get the first 50 rows
    first_50_rows = transformed_data | beam.combiners.Sample.FixedSizeGlobally(50)

    first_50_rows | 'Print Output' >> beam.Map(print)



[{'id': 130, 'name': 'gyarados', 'height': 1.651, 'weight': 1065.9412, 'bmi': 391.06}, {'id': 144, 'name': 'articuno', 'height': 0.43179999999999996, 'weight': 251.289968, 'bmi': 1347.75}, {'id': 165, 'name': 'ledyba', 'height': 0.254, 'weight': 48.987936, 'bmi': 759.31}, {'id': 164, 'name': 'noctowl', 'height': 0.4064, 'weight': 185.065536, 'bmi': 1120.52}, {'id': 226, 'name': 'mantine', 'height': 0.5334, 'weight': 997.9024, 'bmi': 3507.37}, {'id': 6, 'name': 'charizard', 'height': 0.43179999999999996, 'weight': 410.50076, 'bmi': 2201.65}, {'id': 146, 'name': 'moltres', 'height': 0.508, 'weight': 272.1552, 'bmi': 1054.6}, {'id': 142, 'name': 'aerodactyl', 'height': 0.4572, 'weight': 267.61928, 'bmi': 1280.28}, {'id': 149, 'name': 'dragonite', 'height': 0.5588, 'weight': 952.5432, 'bmi': 3050.51}, {'id': 189, 'name': 'jumpluff', 'height': 0.2032, 'weight': 13.607759999999999, 'bmi': 329.56}, {'id': 17, 'name': 'pidgeotto', 'height': 0.2794, 'weight': 136.0776, 'bmi': 1743.15}, {'id': 1