# Scraping from the Bulbapedia Pokemon List

### Imports

In [31]:
import requests
import json
from bs4 import BeautifulSoup

URL="https://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_by_National_Pok%C3%A9dex_number"
page=requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')

### Get the relevant segments

In [3]:
poke_content=soup.find(id='mw-content-text')

poke_tables=poke_content.find_all('table')
# table[0] refers to the table of contents
# table[1-8] refers to the tables containing the list of pokemon per generation
# table[9] refers to the navigation pane at the bottom

### Get the content from the tables

In [13]:
national_dex = []
info_start = 3

for i in range(1,8):
    table = poke_tables[i]
    gen = {
        "generation": i,
        "dex": []
    }
    for j in range(info_start, len(table.contents), 2 ):
        poke_info = table.contents[j]
        kdex=poke_info.contents[1].text.strip()
        ndex=poke_info.contents[3].text.strip()
        name=poke_info.contents[7].text.strip()
        type1=poke_info.contents[9].text.strip()
        if len(poke_info.contents) > 10:
            type2=poke_info.contents[11].text.strip()
            gen["dex"].append({
                "ldex": kdex, # local pokedex
                "ndex": ndex,
                "name": name,
                "type1": type1,
                "type2": type2
            })
        else:
            gen["dex"].append({
                "ldex": kdex,
                "ndex": ndex,
                "name": name,
                "type1": type1
            })
    national_dex.append(gen)


### Write to file

In [14]:
with open('national_dex.json', 'w') as file:
    json.dump(national_dex, file, indent=4)

### Sanity Check

In [15]:
# get generation 1
print(national_dex[0])

{'generation': 1, 'dex': [{'ldex': '#001', 'ndex': '#001', 'name': 'Bulbasaur', 'type1': 'Grass', 'type2': 'Poison'}, {'ldex': '#002', 'ndex': '#002', 'name': 'Ivysaur', 'type1': 'Grass', 'type2': 'Poison'}, {'ldex': '#003', 'ndex': '#003', 'name': 'Venusaur', 'type1': 'Grass', 'type2': 'Poison'}, {'ldex': '#004', 'ndex': '#004', 'name': 'Charmander', 'type1': 'Fire'}, {'ldex': '#005', 'ndex': '#005', 'name': 'Charmeleon', 'type1': 'Fire'}, {'ldex': '#006', 'ndex': '#006', 'name': 'Charizard', 'type1': 'Fire', 'type2': 'Flying'}, {'ldex': '#007', 'ndex': '#007', 'name': 'Squirtle', 'type1': 'Water'}, {'ldex': '#008', 'ndex': '#008', 'name': 'Wartortle', 'type1': 'Water'}, {'ldex': '#009', 'ndex': '#009', 'name': 'Blastoise', 'type1': 'Water'}, {'ldex': '#010', 'ndex': '#010', 'name': 'Caterpie', 'type1': 'Bug'}, {'ldex': '#011', 'ndex': '#011', 'name': 'Metapod', 'type1': 'Bug'}, {'ldex': '#012', 'ndex': '#012', 'name': 'Butterfree', 'type1': 'Bug', 'type2': 'Flying'}, {'ldex': '#013',

In [30]:
# get the 69th pokemon in the local dex in gen 3
print("Generation ", national_dex[2]["generation"])
poke = [a for a in national_dex[2]["dex"] if a['ldex'] == "#069"]
print(poke)

Generation  3
[{'ldex': '#069', 'ndex': '#303', 'name': 'Mawile', 'type1': 'Steel', 'type2': 'Fairy'}]
