# Pokemon WebScraping Project

This script extract some features of all 893 pokemons from 'https://pokemondb.net/pokedex/national', including the pokemon's national number, name, type(s), type defense effectiveness multiplier, and picture. All scraped pictures are saved in a sub-folder called 'images', while other scraped data is saved in a CSV file later created call 'pokemons.csv'. This CSV file as well as the picture folder will come up in a later project and serve as a database for a mobile/web application designed to provide new pokemon players with type advantages during pokemon battles using intuitive UI.

## Creating a sub-folder for the images

In [1]:
import os

os.mkdir('images')

## Making Web Request

In [2]:
import requests

URL = 'https://pokemondb.net/pokedex/national'
page = requests.get(URL)

## Parsing HTML with BeautifulSoup package

In [3]:
from bs4 import BeautifulSoup

soup = BeautifulSoup(page.content, 'html.parser')
results = soup.find_all("div", {"class": "infocard"})
results[0].prettify()

'<div class="infocard">\n <span class="infocard-lg-img">\n  <a href="/pokedex/bulbasaur">\n   <span class="img-fixed img-sprite" data-alt="Bulbasaur sprite" data-src="https://img.pokemondb.net/sprites/omega-ruby-alpha-sapphire/dex/normal/bulbasaur.png">\n   </span>\n  </a>\n </span>\n <span class="infocard-lg-data text-muted">\n  <small>\n   #001\n  </small>\n  <br/>\n  <a class="ent-name" href="/pokedex/bulbasaur">\n   Bulbasaur\n  </a>\n  <br/>\n  <small>\n   <a class="itype grass" href="/type/grass">\n    Grass\n   </a>\n   ·\n   <a class="itype poison" href="/type/poison">\n    Poison\n   </a>\n  </small>\n </span>\n</div>\n'

## Multiplier function

This function will take a string that indicate a multiplier in the HTML file and return the respective multiplier

In [4]:
def replace_multiplier(string):
    if string == 'type-fx-0':
        return 0
    elif string == 'type-fx-25':
        return 0.25
    elif string == 'type-fx-50':
        return 0.50
    elif string == 'type-fx-200':
        return 2
    elif string == 'type-fx-400':
        return 4
    else:
        return 1

## Scraping images

In [5]:
# Scraping all images into sub-folder 'images'
from PIL import Image

for result in results:
    imageName = result.find("a", {"class": "ent-name"}).text
    imageName = imageName.replace("\'", "")
    imageName = imageName.replace(":", "")
    imageName = imageName.replace(" ", "")
    imageName = imageName.replace("♀", "F")
    imageName = imageName.replace("♂", "M")
    imageURL = result.find("span",{"class": "img-fixed img-sprite"})["data-src"]
    img = Image.open(requests.get(imageURL, stream = True).raw)
    img.save(os.path.join("images", imageName + ".png"))

## Scraping Pokemon data

In [6]:
import re
meta = []
mainURL = "https://pokemondb.net"
for result in results:
    pokeCurrent = []
    # Extract national number
    pokeNo = result.find_all("small")[0].text
    # Extract name
    pokeName = result.find("a", {"class": "ent-name"}).text
    pokeName = pokeName.replace("\'", "")
    pokeName = pokeName.replace(":", "")
    pokeName = pokeName.replace(" ", "")
    pokeName = pokeName.replace("♀", "F")
    pokeName = pokeName.replace("♂", "M")
    # Extract Type
    pokeType1 = result.find_all("small")[1].find_all("a")[0].text
    try:
        pokeType2 = result.find_all("small")[1].find_all("a")[1].text
    except IndexError:
        pokeType2 = ""
    # Extract Type defense effectiveness
    subURL = result.find("a", {"class": "ent-name"})["href"]
    bindURL = mainURL + subURL
    subPage = requests.get(bindURL)
    subSoup = BeautifulSoup(subPage.content, 'html.parser')
    subResults = subSoup.find_all("table", {"class": "type-table type-table-pokedex"})
    nor = replace_multiplier(subResults[0].find_all("tr")[1].find_all("td")[0]["class"][1])
    fir = replace_multiplier(subResults[0].find_all("tr")[1].find_all("td")[1]["class"][1])
    wat = replace_multiplier(subResults[0].find_all("tr")[1].find_all("td")[2]["class"][1])
    ele = replace_multiplier(subResults[0].find_all("tr")[1].find_all("td")[3]["class"][1])
    gra = replace_multiplier(subResults[0].find_all("tr")[1].find_all("td")[4]["class"][1])
    ice = replace_multiplier(subResults[0].find_all("tr")[1].find_all("td")[5]["class"][1])
    fig = replace_multiplier(subResults[0].find_all("tr")[1].find_all("td")[6]["class"][1])
    poi = replace_multiplier(subResults[0].find_all("tr")[1].find_all("td")[7]["class"][1])
    gro = replace_multiplier(subResults[0].find_all("tr")[1].find_all("td")[8]["class"][1])
    fly = replace_multiplier(subResults[1].find_all("tr")[1].find_all("td")[0]["class"][1])
    psy = replace_multiplier(subResults[1].find_all("tr")[1].find_all("td")[1]["class"][1])
    bug = replace_multiplier(subResults[1].find_all("tr")[1].find_all("td")[2]["class"][1])
    roc = replace_multiplier(subResults[1].find_all("tr")[1].find_all("td")[3]["class"][1])
    gho = replace_multiplier(subResults[1].find_all("tr")[1].find_all("td")[4]["class"][1])
    dra = replace_multiplier(subResults[1].find_all("tr")[1].find_all("td")[5]["class"][1])
    dar = replace_multiplier(subResults[1].find_all("tr")[1].find_all("td")[6]["class"][1])
    ste = replace_multiplier(subResults[1].find_all("tr")[1].find_all("td")[7]["class"][1])
    fai = replace_multiplier(subResults[1].find_all("tr")[1].find_all("td")[8]["class"][1])
    pokeCurrent.extend((pokeNo, pokeName, pokeType1, pokeType2, 
                        nor, fir, wat, ele, gra, ice, fig, poi, gro, 
                        fly, psy, bug, roc, gho, dra, dar, ste, fai))
    meta.append(pokeCurrent)

## Creating a dataframe

Transform a list of multiple lists 'meta' into a dataframe for better data illustration

In [7]:
import pandas as pd
df = pd.DataFrame.from_records(meta)

In [8]:
df.columns = ['NationalNumber', 'Pokemon', 'Type_1', 'Type_2', 
             'Normal', 'Fire', 'Water', 'Electric', 'Grass', 'Ice', 'Fighting', 'Poison', 'Ground',
             'Flying', 'Psychic', 'Bug', 'Rock', 'Ghost', 'Dragon', 'Dark', 'Steel', 'Fairy']
df

Unnamed: 0,NationalNumber,Pokemon,Type_1,Type_2,Normal,Fire,Water,Electric,Grass,Ice,...,Ground,Flying,Psychic,Bug,Rock,Ghost,Dragon,Dark,Steel,Fairy
0,#001,Bulbasaur,Grass,Poison,1.0,2.0,0.5,0.5,0.25,2.0,...,1.0,2.0,2.0,1.00,1.00,1.0,1.0,1.00,1.0,0.5
1,#002,Ivysaur,Grass,Poison,1.0,2.0,0.5,0.5,0.25,2.0,...,1.0,2.0,2.0,1.00,1.00,1.0,1.0,1.00,1.0,0.5
2,#003,Venusaur,Grass,Poison,1.0,2.0,0.5,0.5,0.25,2.0,...,1.0,2.0,2.0,1.00,1.00,1.0,1.0,1.00,1.0,0.5
3,#004,Charmander,Fire,,1.0,0.5,2.0,1.0,0.50,0.5,...,2.0,1.0,1.0,0.50,2.00,1.0,1.0,1.00,0.5,0.5
4,#005,Charmeleon,Fire,,1.0,0.5,2.0,1.0,0.50,0.5,...,2.0,1.0,1.0,0.50,2.00,1.0,1.0,1.00,0.5,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
888,#889,Zamazenta,Fighting,Steel,0.5,2.0,1.0,1.0,0.50,0.5,...,2.0,1.0,1.0,0.25,0.25,1.0,0.5,0.50,0.5,1.0
889,#890,Eternatus,Poison,Dragon,1.0,0.5,0.5,0.5,0.25,2.0,...,2.0,1.0,2.0,0.50,1.00,1.0,2.0,1.00,1.0,1.0
890,#891,Kubfu,Fighting,,1.0,1.0,1.0,1.0,1.00,1.0,...,1.0,2.0,2.0,0.50,0.50,1.0,1.0,0.50,1.0,2.0
891,#892,Urshifu,Fighting,Dark,1.0,1.0,1.0,1.0,1.00,1.0,...,1.0,2.0,0.0,1.00,0.50,0.5,1.0,0.25,1.0,4.0


## Export CSV file

In [9]:
df.to_csv('pokemons.csv', index = False)

## Conclusion

This web scraping project is the data collecting process for a later Pokemon Web/Mobile Application designed to help new pokemon trainers become more familiar with the game and type advantages without having to waste their time searching the web for individual pokemons. Nonetheless, this script is highly-reusable for updating information of new generations of pokemons to come.  