![title](Pokemon.jpg)

### Scraping Pokemon Data & Sprites 
This notebook will cover how to scrape Pokemon data and sprites.
The plan is to to create an interactive pokedex in another project.

##### Part 1. Scraping main page for the urls to each Pokemon page

##### Part 2. Scraping Pokemon pages for base stats and types

##### Part 3. Scraping Pokemon Sprites

In [337]:
import requests
from bs4 import BeautifulSoup 
import pandas as pd
import numpy as np

##### Part 1. Scraping main page for the urls to each Pokemon page

In [338]:
response = requests.get('https://pokemondb.net/pokedex/national')

if response.status_code == 200:
    print('Request Successful!')
else:
    print("Error!")

Request Successful!


In [339]:
# explore the tags to see where we can find the links to individual pokemon pages
soup = BeautifulSoup(response.content, 'html.parser')
print(soup.prettify()[:1500])

<!DOCTYPE html>
<html lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   List of Pokémon (sprites gallery) | Pokémon Database
  </title>
  <link href="https://fonts.gstatic.com" rel="preconnect"/>
  <link href="https://img.pokemondb.net" rel="preconnect"/>
  <link href="/static/css/pokemondb-e4cc99e3c7.css" rel="stylesheet"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <meta content="A simple list of all 890 Pokémon by National Dex number, with images." name="description" property="og:description"/>
  <link href="https://pokemondb.net/pokedex/national" rel="canonical"/>
  <meta content="https://pokemondb.net/pokedex/national" property="og:url"/>
  <meta content="summary" name="twitter:card"/>
  <meta content="List of Pokémon (sprites gallery)" property="og:title"/>
  <link href="/favicon.ico" rel="shortcut icon" type="image/x-icon"/>
  <link href="/apple-touch-icon-precomposed.png" rel="apple-touch-icon-precomposed"/>
  <link href="https://pokemondb

In [340]:
base_url = 'https://pokemondb.net'

# dict to store the pokemon name and url
url_dict = {}

# loop that finds all the pokemon names along with their url and saves in url_dict
# Limiting the pokemon to the first 251. Generation 1&2. This can be changed if you want more pokemon.
for x in soup.find_all('a' ,class_= "ent-name", href=True)[:251]:
    url_dict[x.get_text()] = base_url + x['href']

In [341]:
print(url_dict['Bulbasaur'])
print(url_dict['Ninetales'])
print(url_dict['Weepinbell'])
print(url_dict['Gyarados'])
print(url_dict['Lugia'])

https://pokemondb.net/pokedex/bulbasaur
https://pokemondb.net/pokedex/ninetales
https://pokemondb.net/pokedex/weepinbell
https://pokemondb.net/pokedex/gyarados
https://pokemondb.net/pokedex/lugia


In [342]:
# dict to store the page html for each pokemon
page_dict = {}

# loop through pokemon page url and save html content to page_dict
for key, value in url_dict.items():
    r = requests.get(value)
    page_dict[key] = BeautifulSoup(r.content, 'html.parser')

In [344]:
# sample pokemon page html to explore for base stats and type
print(page_dict['Charmander'].prettify()[:1500])

<!DOCTYPE html>
<html lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   Charmander Pokédex: stats, moves, evolution &amp; locations | Pokémon Database
  </title>
  <link href="https://fonts.gstatic.com" rel="preconnect"/>
  <link href="https://img.pokemondb.net" rel="preconnect"/>
  <link href="/static/css/pokemondb-e4cc99e3c7.css" rel="stylesheet"/>
  <style>
   .cell-barchart{width:100%;min-width:150px}.barchart-bar{height:.75rem;border-radius:4px;background-color:#a3a3a3;border:1px solid #737373;border-color:rgba(0,0,0,0.15)}.barchart-rank-1{background-color:#f34444}.barchart-rank-2{background-color:#ff7f0f}.barchart-rank-3{background-color:#ffdd57}.barchart-rank-4{background-color:#a0e515}.barchart-rank-5{background-color:#23cd5e}.barchart-rank-6{background-color:#00c2b8}.etymology>dd{margin-bottom:0}
.type-table th,.type-table td{padding:0;border:1px solid #f0f0f0}.cell-atkdef{font-size:.625rem;font-weight:normal;line-height:1}.type-cell{display:block;width:66px;height:28p

In [345]:
# stats we are interested in
stat_list = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Total']

# dict to store pokemon name along with base stats
data_dict = {}

# loop through pokemon page html and search for the base stats, save to data_dict
for key, value in page_dict.items():
    
    stat_dict = {}
        
    for stat in stat_list:
        
        stat_dict[stat] = page_dict[key].find('th', text=stat).next_sibling.next_sibling.text
    
    data_dict[key] = stat_dict

In [346]:
print("Bulbasar :" + str(data_dict['Bulbasaur']))
print("Ninetales :" + str(data_dict['Ninetales']))
print("Weepinbell :" + str(data_dict['Weepinbell']))
print("Gyarados :" + str(data_dict['Gyarados']))
print("Lugia :" + str(data_dict['Lugia']))

Bulbasar :{'HP': '45', 'Attack': '49', 'Defense': '49', 'Sp. Atk': '65', 'Sp. Def': '65', 'Speed': '45', 'Total': '318'}
Ninetales :{'HP': '73', 'Attack': '76', 'Defense': '75', 'Sp. Atk': '81', 'Sp. Def': '100', 'Speed': '100', 'Total': '505'}
Weepinbell :{'HP': '65', 'Attack': '90', 'Defense': '50', 'Sp. Atk': '85', 'Sp. Def': '45', 'Speed': '55', 'Total': '390'}
Gyarados :{'HP': '95', 'Attack': '125', 'Defense': '79', 'Sp. Atk': '60', 'Sp. Def': '100', 'Speed': '81', 'Total': '540'}
Lugia :{'HP': '106', 'Attack': '90', 'Defense': '130', 'Sp. Atk': '90', 'Sp. Def': '154', 'Speed': '110', 'Total': '680'}


In [347]:
# store as a dataframe
df = pd.DataFrame(data_dict).transpose()
df.head(15)

Unnamed: 0,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Total
Bulbasaur,45,49,49,65,65,45,318
Ivysaur,60,62,63,80,80,60,405
Venusaur,80,82,83,100,100,80,525
Charmander,39,52,43,60,50,65,309
Charmeleon,58,64,58,80,65,80,405
Charizard,78,84,78,109,85,100,534
Squirtle,44,48,65,50,64,43,314
Wartortle,59,63,80,65,80,58,405
Blastoise,79,83,100,85,105,78,530
Caterpie,45,30,35,20,20,45,195


##### Part 2. Scraping Pokemon pages for base stats and types

In [357]:
# dict to store type data
type_dict = {}

# loop through page_dict to get type data, store in type_dict
for key, value in page_dict.items():
    try:
        type_dict[key] = [value.find_all('a', class_ = 'itype')[0].get_text(), value.find_all('a', class_ = 'itype')[1].get_text()]
    except:
        type_dict[key] = [value.find_all('a', class_ = 'itype')[0].get_text(), np.nan]
        

In [358]:
# store as a dataframe
df1 = pd.DataFrame(type_dict).transpose()
df1 = df1.rename(columns={0:'Type1', 1:'Type2'})
df1.head(15)

Unnamed: 0,Type1,Type2
Bulbasaur,Grass,Poison
Ivysaur,Grass,Poison
Venusaur,Grass,Poison
Charmander,Fire,Fire
Charmeleon,Fire,Fire
Charizard,Fire,Flying
Squirtle,Water,Water
Wartortle,Water,Water
Blastoise,Water,Water
Caterpie,Bug,Bug


In [363]:
# merge base stat and type dataframes
final_df = df.merge(df1, how = 'left', on = df.index)
final_df['Type2'].loc[final_df['Type1']==final_df['Type2']] = np.nan
final_df = final_df.rename(columns={'key_0':'Name'})
final_df.head(15)

Unnamed: 0,Name,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Total,Type1,Type2
0,Bulbasaur,45,49,49,65,65,45,318,Grass,Poison
1,Ivysaur,60,62,63,80,80,60,405,Grass,Poison
2,Venusaur,80,82,83,100,100,80,525,Grass,Poison
3,Charmander,39,52,43,60,50,65,309,Fire,
4,Charmeleon,58,64,58,80,65,80,405,Fire,
5,Charizard,78,84,78,109,85,100,534,Fire,Flying
6,Squirtle,44,48,65,50,64,43,314,Water,
7,Wartortle,59,63,80,65,80,58,405,Water,
8,Blastoise,79,83,100,85,105,78,530,Water,
9,Caterpie,45,30,35,20,20,45,195,Bug,


In [361]:
# save the table as a csv
final_df.to_csv('pokemon.csv')

##### Part 3. Scraping Pokemon Sprites

In [323]:
# list of pokemon names using the dictionary from step 1
# names need to be in lower case for the url request to work
pokemon_list = [x.lower() for x in list(url_dict.keys())]

# base url which we are going to attach the poekmon name to the end and add a .png
base_sprite_url = 'https://img.pokemondb.net/sprites/bank/normal/'

# loo[ through for all pokemon names we want and save the images
for pokemon in pokemon_list:
    
    sprite_request = requests.get(base_sprite_url+pokemon+'.png')
    
    if sprite_request.status_code == 200:
        with open(str(pokemon)+".png", 'wb') as f:
            f.write(sprite_request.content)