# Setup

In [3]:
import requests
import json
import pandas as pd
import duckdb

def api_request(endpoint):
    url = f'https://pokeapi.co/api/v2/{endpoint}/'
    response = requests.get(url)
    data = response.json()
    return data

def get_gen_poke(gen):
    data = api_request(f'pokedex/{gen}')
    df = pd.json_normalize(data['pokemon_entries'])
    return df

def get_poke_encounters(id):
    return api_request(f'pokemon/{id}/encounters')

def get_region_locations(name):
    data = api_request(f'region/{name}')
    df = pd.json_normalize(data['locations'])
    return df

# Main

In [4]:
# 1. Get pokemon from emerald generation
poke_entries = get_gen_poke('hoenn')
poke_entries['id'] = poke_entries['pokemon_species.url'].apply(lambda x: x.split('pokemon-species/')[1].split('/')[0])

# 2. Get emerald region locations
locs = get_region_locations('hoenn')

In [None]:
# Import data into duckdb
try:
    con = duckdb.connect()
    con.execute("CREATE TABLE pokedex AS SELECT * FROM poke_entries")
finally:
    con.close()

# Debug

In [3]:
# 3. Get pokemon encounter areas
# poke_encounters = get_pokemon_encounters(id)
data = get_poke_encounters(poke_entries['pokemon_species.name'][197])

In [5]:
locs

Unnamed: 0,name,url
0,petalburg-city,https://pokeapi.co/api/v2/location/429/
1,slateport-city,https://pokeapi.co/api/v2/location/430/
2,lilycove-city,https://pokeapi.co/api/v2/location/431/
3,mossdeep-city,https://pokeapi.co/api/v2/location/432/
4,sootopolis-city,https://pokeapi.co/api/v2/location/433/
...,...,...
102,terra-cave,https://pokeapi.co/api/v2/location/803/
103,marine-cave,https://pokeapi.co/api/v2/location/804/
104,faraway-island,https://pokeapi.co/api/v2/location/812/
105,hoenn-battle-frontier,https://pokeapi.co/api/v2/location/813/


In [None]:
# Get location encounter data
data = api_request('location-area/429')

In [12]:
print(data.keys())
data['pokemon_encounters']

dict_keys(['encounter_method_rates', 'game_index', 'id', 'location', 'name', 'names', 'pokemon_encounters'])


[{'pokemon': {'name': 'oddish',
   'url': 'https://pokeapi.co/api/v2/pokemon/43/'},
  'version_details': [{'encounter_details': [{'chance': 20,
      'condition_values': [],
      'max_level': 27,
      'method': {'name': 'walk',
       'url': 'https://pokeapi.co/api/v2/encounter-method/1/'},
      'min_level': 27},
     {'chance': 10,
      'condition_values': [],
      'max_level': 29,
      'method': {'name': 'walk',
       'url': 'https://pokeapi.co/api/v2/encounter-method/1/'},
      'min_level': 29}],
    'max_chance': 30,
    'version': {'name': 'ruby',
     'url': 'https://pokeapi.co/api/v2/version/7/'}},
   {'encounter_details': [{'chance': 20,
      'condition_values': [],
      'max_level': 27,
      'method': {'name': 'walk',
       'url': 'https://pokeapi.co/api/v2/encounter-method/1/'},
      'min_level': 27},
     {'chance': 10,
      'condition_values': [],
      'max_level': 29,
      'method': {'name': 'walk',
       'url': 'https://pokeapi.co/api/v2/encounter-method/

In [25]:
norm0 = pd.json_normalize(data['pokemon_encounters'])

In [None]:
# Step 1: Normalize the top-level 'pokemon_encounters'
top_level = pd.json_normalize(
    data['pokemon_encounters'], 
    meta=['pokemon.name', 'pokemon.url'],  # Include 'pokemon' fields
    meta_prefix='pokemon.'  # Add prefix to avoid name conflicts
)

# Step 2: Normalize the nested 'version_details' within 'pokemon_encounters'
version_details = pd.json_normalize(
    data['pokemon_encounters'], 
    record_path='version_details',  # Drill into 'version_details'
    meta=['name', 'url'],  # Retain related 'pokemon' fields
    meta_prefix='pokemon.', 
    record_prefix='version_detail.',  # Prefix for version details
    errors='ignore'
)


In [29]:
pd.json_normalize(
    norm0,
    record_path='version_details',
    meta=['pokemon.name', 'pokemon.url'],
    meta_prefix='pokemon.',
    record_prefix='version_detail.'
)

TypeError: string indices must be integers, not 'str'

In [23]:
pd.json_normalize(
    data['pokemon_encounters'], 
    record_path='version_details',  # Normalize the nested 'version_details'
    meta=['name', 'url'],  # Include parent keys in the result
    meta_prefix='pokemon.',  # Optional: Add a prefix to meta columns
    record_prefix='version_detail.'  # Optional: Add a prefix to record columns
)

KeyError: "Key 'name' not found. To replace missing values of 'name' with np.nan, pass in errors='ignore'"

In [6]:
poke_entries

Unnamed: 0,entry_number,pokemon_species.name,pokemon_species.url,id
0,1,treecko,https://pokeapi.co/api/v2/pokemon-species/252/,252
1,2,grovyle,https://pokeapi.co/api/v2/pokemon-species/253/,253
2,3,sceptile,https://pokeapi.co/api/v2/pokemon-species/254/,254
3,4,torchic,https://pokeapi.co/api/v2/pokemon-species/255/,255
4,5,combusken,https://pokeapi.co/api/v2/pokemon-species/256/,256
...,...,...,...,...
197,198,kyogre,https://pokeapi.co/api/v2/pokemon-species/382/,382
198,199,groudon,https://pokeapi.co/api/v2/pokemon-species/383/,383
199,200,rayquaza,https://pokeapi.co/api/v2/pokemon-species/384/,384
200,201,jirachi,https://pokeapi.co/api/v2/pokemon-species/385/,385
