In [1]:
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd

In [2]:
dex_url = "https://pokemondb.net/pokedex/all"
re = requests.get(dex_url)
re.raise_for_status()
raw_dex_data = bs(re.text, 'html.parser')

pokemon = []
pokemon_order = {}
tables = raw_dex_data.find_all('a', attrs={'class':'ent-name'})
count = 0
for link in tables:
    if link.text not in pokemon:
        pokemon.append(link.text)
        pokemon_order[link.text] = count
        count+=1

In [3]:
pokemon_url = {}
for pkmn in pokemon:
    pokemon_url[pkmn] = ('https://pokemondb.net/pokedex/' + pkmn)

pokemon_url['Pikachu']

'https://pokemondb.net/pokedex/Pikachu'

In [4]:
# This cell takes a very long time, it scrapes every pokedex entry
raw_pokemon_data = {}
for pkmn in pokemon_url:
    try:
        re = requests.get(pokemon_url[pkmn])
        re.raise_for_status()
        raw_pokemon_data[pkmn] = (bs(re.text, 'html.parser'))
    except:
        pass
    
len(raw_pokemon_data)

885

In [5]:
raw_pokemon_stats = {}
headers = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']
for pkmn in raw_pokemon_data:
    raw_pokemon_tables = raw_pokemon_data[pkmn].find_all('table')
    for table in raw_pokemon_tables:
        correct_table = True
        for header in headers:
            if header not in str(table):
                correct_table = False
        if correct_table and pkmn not in raw_pokemon_stats:
            raw_pokemon_stats[pkmn] = table
        
raw_pokemon_stats['Pikachu']

<table class="vitals-table">
<tbody>
<tr>
<th>HP</th>
<td class="cell-num">35</td>
<td class="cell-barchart">
<div class="barchart-bar barchart-rank-2" style="width:19.44%;"></div>
</td>
<td class="cell-num">180</td>
<td class="cell-num">274</td>
</tr>
<tr>
<th>Attack</th>
<td class="cell-num">55</td>
<td class="cell-barchart">
<div class="barchart-bar barchart-rank-2" style="width:30.56%;"></div>
</td>
<td class="cell-num">103</td>
<td class="cell-num">229</td>
</tr>
<tr>
<th>Defense</th>
<td class="cell-num">40</td>
<td class="cell-barchart">
<div class="barchart-bar barchart-rank-2" style="width:22.22%;"></div>
</td>
<td class="cell-num">76</td>
<td class="cell-num">196</td>
</tr>
<tr>
<th>Sp. Atk</th>
<td class="cell-num">50</td>
<td class="cell-barchart">
<div class="barchart-bar barchart-rank-2" style="width:27.78%;"></div>
</td>
<td class="cell-num">94</td>
<td class="cell-num">218</td>
</tr>
<tr>
<th>Sp. Def</th>
<td class="cell-num">50</td>
<td class="cell-barchart">
<div clas

In [6]:
regions = {1: "Kanto", 2: "Johto", 3: "Hoenn", 4: "Sinnoh", 5: "Unova", 6: "Kalos", 7: "Alola", 8:"Galar"}

pokemon_stats = []
for pkmn in raw_pokemon_stats:
    stats = {}
    raw_stats = raw_pokemon_stats[pkmn].find_all('tr')
    stats['Name'] = pkmn
    generation_text = raw_pokemon_data[pkmn].find('p').text
    generation_idx = generation_text.find('Generation')
    generation = int(generation_text[generation_idx + len('Generation ')])
    stats["Region"] = regions[generation]
    pkmn_types = raw_pokemon_data[pkmn].find('table').find_all('td')[1].find_all('a')
    stats['Type 1'] = pkmn_types[0].text
    if len(pkmn_types) > 1:
        stats['Type 2'] = pkmn_types[1].text
    else:
        stats['Type 2'] = 'None'
    for stat in raw_stats:
        stat_name = stat.find('th').get_text()
        stat_num = stat.find('td').get_text()
        if stat_name in headers:
            stats[stat_name] = stat_num
    pokemon_stats.append(stats)

pokemon_stats = sorted(pokemon_stats, key = lambda x: pokemon_order[x['Name']])
pokemon_stats[:3]

[{'Name': 'Bulbasaur',
  'Region': 'Kanto',
  'Type 1': 'Grass',
  'Type 2': 'Poison',
  'HP': '45',
  'Attack': '49',
  'Defense': '49',
  'Sp. Atk': '65',
  'Sp. Def': '65',
  'Speed': '45'},
 {'Name': 'Ivysaur',
  'Region': 'Kanto',
  'Type 1': 'Grass',
  'Type 2': 'Poison',
  'HP': '60',
  'Attack': '62',
  'Defense': '63',
  'Sp. Atk': '80',
  'Sp. Def': '80',
  'Speed': '60'},
 {'Name': 'Venusaur',
  'Region': 'Kanto',
  'Type 1': 'Grass',
  'Type 2': 'Poison',
  'HP': '80',
  'Attack': '82',
  'Defense': '83',
  'Sp. Atk': '100',
  'Sp. Def': '100',
  'Speed': '80'}]

In [7]:
cols = ["Name","Attack","Defense","HP","Region","Sp. Atk","Sp. Def","Speed","Type 1","Type 2"]
df = pd.DataFrame(pokemon_stats)
df = df[cols]
df.to_csv('pokemon_stats.csv')

In [8]:
type_url = "https://pokemondb.net/type"
re = requests.get(type_url)
re.raise_for_status()
raw_type_data = bs(re.text, 'html.parser')

effectiveness = {}
raw_to_numbers = {'normal': 1.0, 'not': 0.5, 'super-effective': 2.0, 'no': 0.0}
table = raw_type_data.find('table')
rows = table.find_all('tr')[1:]
for row in rows:
    cells = row.find_all('td')
    for cell in cells:
        data = cell.attrs['title']
        types, val = data.split(' = ')
        type1, type2 = types.split(" → ")
        val = val.split()[0]
        if type1 not in effectiveness:
            effectiveness[type1] = {}
        effectiveness[type1][type2] = raw_to_numbers[val]
        
effectiveness['Fire']

{'Normal': 1.0,
 'Fire': 0.5,
 'Water': 0.5,
 'Electric': 1.0,
 'Grass': 2.0,
 'Ice': 2.0,
 'Fighting': 1.0,
 'Poison': 1.0,
 'Ground': 1.0,
 'Flying': 1.0,
 'Psychic': 1.0,
 'Bug': 2.0,
 'Rock': 0.5,
 'Ghost': 1.0,
 'Dragon': 0.5,
 'Dark': 1.0,
 'Steel': 2.0,
 'Fairy': 1.0}

In [9]:
df = pd.DataFrame(effectiveness)
df.to_csv('type_effectiveness_stats.csv')