**Project-2:** *Gotta fetch 'em all* - Enriching the dataset

**Libraries**

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
from pandas import json_normalize
import json
import os
from dotenv import load_dotenv
import time

# Importing the dataset and defining our goals

The dataset was obtained from the kaggle website:
[Pokémon dataset](https://www.kaggle.com/datasets/abcsds/pokemon)

In [2]:
# Importing and checking how the dataset looks like

pokemon = pd.read_csv("./data/pokemon.csv")
pokemon = pokemon.set_index('#')
pokemon.head() 

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


In [6]:
pokemon.tail()

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
719,Diancie,Rock,Fairy,600,50,100,150,100,150,50,6,True
719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,110,110,6,True
720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True
721,Volcanion,Fire,Water,600,80,110,120,130,90,70,6,True


- As of today, the Pokémon games consist of 8 generations, adding up a total of **#905** creatures. Thus, this dataset from kaggle is outdated. In addition, it is missing some categories that would make for an interesting investigation. 
- Luckily enough, there's an **API** called [PokéAPI](https://pokeapi.co/) that contains huge amounts of information about the pokémon world. I will make use of this API to first add the missing pokémon to my dataset with their respective categories.

## My goal

In [44]:
# To start of, this would be my goal dataset:

dummy_poke = {'Name':['Pokémon_1', 'Pokémon_2', 'Pokémon_3'],
            'Height':['x', 'y', 'z'],
            'Weight':['x', 'y', 'z'],
            'Type 1':['Type_a', 'Type_b', 'Type_c'],
            'Type 1':['Type_a', 'Type_b', 'Type_c'],
            'Total':['x', 'y', 'z'],
            'HP':['x', 'y', 'z'],
            'Attack':['x', 'y', 'z'],
            'Defense':['x', 'y', 'z'],
            'Sp. Atk':['x', 'y', 'z'],
            'Sp. Def':['x', 'y', 'z'],
            'Speed':['x', 'y', 'z'],
            'Habitat':['x', 'y', 'z'],
            'Generation':['1', '2', '3'],
            'Legendary':['True', 'False', 'True'],
}
  
# Create DataFrame
dummy_df = pd.DataFrame(dummy_poke)

dummy_df

Unnamed: 0,Name,Height,Weight,Type 1,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Habitat,Generation,Legendary
0,Pokémon_1,x,x,Type_a,x,x,x,x,x,x,x,x,1,True
1,Pokémon_2,y,y,Type_b,y,y,y,y,y,y,y,y,2,False
2,Pokémon_3,z,z,Type_c,z,z,z,z,z,z,z,z,3,True


# Enriching the dataset with an API

- According to the docs, the PokéAPI does not require authentication and is free and open to use.
- They also removed the rate limit but still encourage the users to limit the number of their requests.
- For the moment I will limit my requests to the missing pokémon of my dataset, since I would also like to request other information from the API.

In [None]:
# Link for requesting pokémon:
# url_api = https://pokeapi.co/api/v2/pokemon/{id or name}/

## Trying one request

I will first try with a single request to see if I get what I need.

In [57]:
response_api = requests.get(f"https://pokeapi.co/api/v2/pokemon/{810}/")
response_api # Request accepted

<Response [200]>

In [58]:
# Looking at the content of the request. We got the first pokémon of the 8th gen:
# response_api.content
# response_api.json()

first_poke = json_normalize(response_api.json())
first_poke # We got lots of info: 129 columns

Unnamed: 0,abilities,base_experience,forms,game_indices,height,held_items,id,is_default,location_area_encounters,moves,...,sprites.versions.generation-vi.x-y.front_shiny,sprites.versions.generation-vi.x-y.front_shiny_female,sprites.versions.generation-vii.icons.front_default,sprites.versions.generation-vii.icons.front_female,sprites.versions.generation-vii.ultra-sun-ultra-moon.front_default,sprites.versions.generation-vii.ultra-sun-ultra-moon.front_female,sprites.versions.generation-vii.ultra-sun-ultra-moon.front_shiny,sprites.versions.generation-vii.ultra-sun-ultra-moon.front_shiny_female,sprites.versions.generation-viii.icons.front_default,sprites.versions.generation-viii.icons.front_female
0,"[{'ability': {'name': 'overgrow', 'url': 'http...",62,"[{'name': 'grookey', 'url': 'https://pokeapi.c...",[],3,[],810,True,https://pokeapi.co/api/v2/pokemon/810/encounters,"[{'move': {'name': 'mega-punch', 'url': 'https...",...,,,,,,,,,https://raw.githubusercontent.com/PokeAPI/spri...,


### Cleaning and accessing flatened information

In [59]:
# Looking at all the columns, I've got many sprites that I'm not interested in.
# first_poke.columns.to_list()

In [60]:
# Dropping sprites columns with regex
# Dropping other unnecessary columns

first_poke = first_poke[first_poke.columns.drop(list(first_poke.filter(regex='sprites.*')))]
first_poke = first_poke.drop(labels=['game_indices', 'held_items', 'is_default', 'past_types', 'species.name', 'species.url', 'order', 'forms'], axis=1)
first_poke.shape

(1, 10)

In [62]:
first_poke

# Will need to extract the 'stats' and types' from the respective columns

Unnamed: 0,abilities,base_experience,height,id,location_area_encounters,moves,name,stats,types,weight
0,"[{'ability': {'name': 'overgrow', 'url': 'http...",62,3,810,https://pokeapi.co/api/v2/pokemon/810/encounters,"[{'move': {'name': 'mega-punch', 'url': 'https...",grookey,"[{'base_stat': 50, 'effort': 0, 'stat': {'name...","[{'slot': 1, 'type': {'name': 'grass', 'url': ...",50


**Accessing stats**

In [75]:
# first_poke['stats'][0]

In [81]:
first_poke['stats'][0][0]['base_stat'] # value of the stat HP -> in a new column 'HP'
first_poke['stats'][0][1]['base_stat'] # value of the stat attack -> in new column 'Attack'
first_poke['stats'][0][2]['base_stat'] # value of the stat defense -> in new column 'Defense'
first_poke['stats'][0][3]['base_stat'] # value of the stat special-attack -> in new column 'Sp. Atk'
first_poke['stats'][0][4]['base_stat'] # value of the stat special-defense -> in new column 'Sp. Def'
first_poke['stats'][0][5]['base_stat'] # value of the stat speed -> in new column 'Speed'

65

**Accessing types**

In [79]:
# first_poke['types'][0]

In [47]:
first_poke['types'][0][0]['type']['name'] # If it has only 1 type -> in new column 'Type 1'
# first_poke['types'][0][1]['type']['name'] # If it had a second type -> in new column 'Type 2'

'grass'

## Looping for many requests

Let's fetch all the missing pokémon:

In [33]:
def fetchMissing(a, b):
    '''This functions receives two pokemon id numbers as integers ('a' and 'b'),
    and returns a dataframe containing the specified pokemon from the ids 'a' to 'b'.
    '''
    missing_pokes = []
    for i in range(a,b+1):
        time.sleep(1)
        print(f"Fetching pokemon with id: {i}")
        response_api = requests.get(f"https://pokeapi.co/api/v2/pokemon/{i}/").json()
        missing_pokes.append(response_api)
    
    return json_normalize(missing_pokes)

In [38]:
# missing_all = fetchMissing(722,905)

In [35]:
missing_all

Unnamed: 0,abilities,base_experience,forms,game_indices,height,held_items,id,is_default,location_area_encounters,moves,...,sprites.versions.generation-vi.x-y.front_shiny,sprites.versions.generation-vi.x-y.front_shiny_female,sprites.versions.generation-vii.icons.front_default,sprites.versions.generation-vii.icons.front_female,sprites.versions.generation-vii.ultra-sun-ultra-moon.front_default,sprites.versions.generation-vii.ultra-sun-ultra-moon.front_female,sprites.versions.generation-vii.ultra-sun-ultra-moon.front_shiny,sprites.versions.generation-vii.ultra-sun-ultra-moon.front_shiny_female,sprites.versions.generation-viii.icons.front_default,sprites.versions.generation-viii.icons.front_female
0,"[{'ability': {'name': 'overgrow', 'url': 'http...",64.0,"[{'name': 'rowlet', 'url': 'https://pokeapi.co...",[],3,[],722,True,https://pokeapi.co/api/v2/pokemon/722/encounters,"[{'move': {'name': 'swords-dance', 'url': 'htt...",...,,,https://raw.githubusercontent.com/PokeAPI/spri...,,https://raw.githubusercontent.com/PokeAPI/spri...,,https://raw.githubusercontent.com/PokeAPI/spri...,,https://raw.githubusercontent.com/PokeAPI/spri...,
1,"[{'ability': {'name': 'overgrow', 'url': 'http...",147.0,"[{'name': 'dartrix', 'url': 'https://pokeapi.c...",[],7,[],723,True,https://pokeapi.co/api/v2/pokemon/723/encounters,"[{'move': {'name': 'swords-dance', 'url': 'htt...",...,,,https://raw.githubusercontent.com/PokeAPI/spri...,,https://raw.githubusercontent.com/PokeAPI/spri...,,https://raw.githubusercontent.com/PokeAPI/spri...,,https://raw.githubusercontent.com/PokeAPI/spri...,
2,"[{'ability': {'name': 'overgrow', 'url': 'http...",265.0,"[{'name': 'decidueye', 'url': 'https://pokeapi...",[],16,[],724,True,https://pokeapi.co/api/v2/pokemon/724/encounters,"[{'move': {'name': 'swords-dance', 'url': 'htt...",...,,,https://raw.githubusercontent.com/PokeAPI/spri...,,https://raw.githubusercontent.com/PokeAPI/spri...,,https://raw.githubusercontent.com/PokeAPI/spri...,,https://raw.githubusercontent.com/PokeAPI/spri...,
3,"[{'ability': {'name': 'blaze', 'url': 'https:/...",64.0,"[{'name': 'litten', 'url': 'https://pokeapi.co...",[],4,[],725,True,https://pokeapi.co/api/v2/pokemon/725/encounters,"[{'move': {'name': 'pay-day', 'url': 'https://...",...,,,https://raw.githubusercontent.com/PokeAPI/spri...,,https://raw.githubusercontent.com/PokeAPI/spri...,,https://raw.githubusercontent.com/PokeAPI/spri...,,https://raw.githubusercontent.com/PokeAPI/spri...,
4,"[{'ability': {'name': 'blaze', 'url': 'https:/...",147.0,"[{'name': 'torracat', 'url': 'https://pokeapi....",[],7,[],726,True,https://pokeapi.co/api/v2/pokemon/726/encounters,"[{'move': {'name': 'pay-day', 'url': 'https://...",...,,,https://raw.githubusercontent.com/PokeAPI/spri...,,https://raw.githubusercontent.com/PokeAPI/spri...,,https://raw.githubusercontent.com/PokeAPI/spri...,,https://raw.githubusercontent.com/PokeAPI/spri...,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
179,"[{'ability': {'name': 'guts', 'url': 'https://...",,"[{'name': 'ursaluna', 'url': 'https://pokeapi....",[],24,[],901,True,https://pokeapi.co/api/v2/pokemon/901/encounters,[],...,,,,,,,,,,
180,"[{'ability': {'name': 'rattled', 'url': 'https...",,"[{'name': 'basculegion-male', 'url': 'https://...",[],30,[],902,True,https://pokeapi.co/api/v2/pokemon/902/encounters,[],...,,,,,,,,,,
181,"[{'ability': {'name': 'pressure', 'url': 'http...",,"[{'name': 'sneasler', 'url': 'https://pokeapi....",[],13,[],903,True,https://pokeapi.co/api/v2/pokemon/903/encounters,[],...,,,,,,,,,,
182,"[{'ability': {'name': 'poison-point', 'url': '...",,"[{'name': 'overqwil', 'url': 'https://pokeapi....",[],25,[],904,True,https://pokeapi.co/api/v2/pokemon/904/encounters,[],...,,,,,,,,,,


In [37]:
# Exporting new dataset of missing pokemon
missing_all.to_csv('./data/pokemon_missing.csv', index = False)

### Cleaning and accesing flatened information

In [94]:
missing_pokes = pd.read_csv("./data/pokemon_missing.csv")

In [None]:
def cleaning(df):

In [95]:
def extractingInfo(df):
    '''
    '''
    df['HP'] = df['stats'][i][0]['base_stat']
    df['Attack'] = df['stats'][i][1]['base_stat']
    df['Defense'] = df['stats'][i][2]['base_stat']
    df['Sp. Atk'] = df['stats'][i][3]['base_stat']
    df['Sp. Def'] = df['stats'][i][4]['base_stat']
    df['Speed'] = df['stats'][i][5]['base_stat']
    
    return df

In [119]:
missing_pokes['stats'][180]

"[{'base_stat': 120, 'effort': 2, 'stat': {'name': 'hp', 'url': 'https://pokeapi.co/api/v2/stat/1/'}}, {'base_stat': 112, 'effort': 0, 'stat': {'name': 'attack', 'url': 'https://pokeapi.co/api/v2/stat/2/'}}, {'base_stat': 65, 'effort': 0, 'stat': {'name': 'defense', 'url': 'https://pokeapi.co/api/v2/stat/3/'}}, {'base_stat': 80, 'effort': 0, 'stat': {'name': 'special-attack', 'url': 'https://pokeapi.co/api/v2/stat/4/'}}, {'base_stat': 75, 'effort': 0, 'stat': {'name': 'special-defense', 'url': 'https://pokeapi.co/api/v2/stat/5/'}}, {'base_stat': 78, 'effort': 0, 'stat': {'name': 'speed', 'url': 'https://pokeapi.co/api/v2/stat/6/'}}]"

In [118]:
first_poke['stats'][0]

[{'base_stat': 50,
  'effort': 0,
  'stat': {'name': 'hp', 'url': 'https://pokeapi.co/api/v2/stat/1/'}},
 {'base_stat': 65,
  'effort': 1,
  'stat': {'name': 'attack', 'url': 'https://pokeapi.co/api/v2/stat/2/'}},
 {'base_stat': 50,
  'effort': 0,
  'stat': {'name': 'defense', 'url': 'https://pokeapi.co/api/v2/stat/3/'}},
 {'base_stat': 40,
  'effort': 0,
  'stat': {'name': 'special-attack',
   'url': 'https://pokeapi.co/api/v2/stat/4/'}},
 {'base_stat': 40,
  'effort': 0,
  'stat': {'name': 'special-defense',
   'url': 'https://pokeapi.co/api/v2/stat/5/'}},
 {'base_stat': 65,
  'effort': 0,
  'stat': {'name': 'speed', 'url': 'https://pokeapi.co/api/v2/stat/6/'}}]

In [86]:
missing_all_newcol = extractingInfo(missing_all)

In [87]:
missing_all_newcol

Unnamed: 0,abilities,base_experience,forms,game_indices,height,held_items,id,is_default,location_area_encounters,moves,...,sprites.versions.generation-vii.ultra-sun-ultra-moon.front_shiny,sprites.versions.generation-vii.ultra-sun-ultra-moon.front_shiny_female,sprites.versions.generation-viii.icons.front_default,sprites.versions.generation-viii.icons.front_female,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
0,"[{'ability': {'name': 'overgrow', 'url': 'http...",64.0,"[{'name': 'rowlet', 'url': 'https://pokeapi.co...",[],3,[],722,True,https://pokeapi.co/api/v2/pokemon/722/encounters,"[{'move': {'name': 'swords-dance', 'url': 'htt...",...,https://raw.githubusercontent.com/PokeAPI/spri...,,https://raw.githubusercontent.com/PokeAPI/spri...,,68,55,55,50,50,42
1,"[{'ability': {'name': 'overgrow', 'url': 'http...",147.0,"[{'name': 'dartrix', 'url': 'https://pokeapi.c...",[],7,[],723,True,https://pokeapi.co/api/v2/pokemon/723/encounters,"[{'move': {'name': 'swords-dance', 'url': 'htt...",...,https://raw.githubusercontent.com/PokeAPI/spri...,,https://raw.githubusercontent.com/PokeAPI/spri...,,68,55,55,50,50,42
2,"[{'ability': {'name': 'overgrow', 'url': 'http...",265.0,"[{'name': 'decidueye', 'url': 'https://pokeapi...",[],16,[],724,True,https://pokeapi.co/api/v2/pokemon/724/encounters,"[{'move': {'name': 'swords-dance', 'url': 'htt...",...,https://raw.githubusercontent.com/PokeAPI/spri...,,https://raw.githubusercontent.com/PokeAPI/spri...,,68,55,55,50,50,42
3,"[{'ability': {'name': 'blaze', 'url': 'https:/...",64.0,"[{'name': 'litten', 'url': 'https://pokeapi.co...",[],4,[],725,True,https://pokeapi.co/api/v2/pokemon/725/encounters,"[{'move': {'name': 'pay-day', 'url': 'https://...",...,https://raw.githubusercontent.com/PokeAPI/spri...,,https://raw.githubusercontent.com/PokeAPI/spri...,,68,55,55,50,50,42
4,"[{'ability': {'name': 'blaze', 'url': 'https:/...",147.0,"[{'name': 'torracat', 'url': 'https://pokeapi....",[],7,[],726,True,https://pokeapi.co/api/v2/pokemon/726/encounters,"[{'move': {'name': 'pay-day', 'url': 'https://...",...,https://raw.githubusercontent.com/PokeAPI/spri...,,https://raw.githubusercontent.com/PokeAPI/spri...,,68,55,55,50,50,42
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
179,"[{'ability': {'name': 'guts', 'url': 'https://...",,"[{'name': 'ursaluna', 'url': 'https://pokeapi....",[],24,[],901,True,https://pokeapi.co/api/v2/pokemon/901/encounters,[],...,,,,,68,55,55,50,50,42
180,"[{'ability': {'name': 'rattled', 'url': 'https...",,"[{'name': 'basculegion-male', 'url': 'https://...",[],30,[],902,True,https://pokeapi.co/api/v2/pokemon/902/encounters,[],...,,,,,68,55,55,50,50,42
181,"[{'ability': {'name': 'pressure', 'url': 'http...",,"[{'name': 'sneasler', 'url': 'https://pokeapi....",[],13,[],903,True,https://pokeapi.co/api/v2/pokemon/903/encounters,[],...,,,,,68,55,55,50,50,42
182,"[{'ability': {'name': 'poison-point', 'url': '...",,"[{'name': 'overqwil', 'url': 'https://pokeapi....",[],25,[],904,True,https://pokeapi.co/api/v2/pokemon/904/encounters,[],...,,,,,68,55,55,50,50,42


### Debugging

**fetchMissing function**

In [16]:
# When I first tried the fetchMissing function, when reaching pokemon 902 it returned an error message:
# JSONDecodeError: Expecting value: line 1 column 1 (char 0)
# Tried defining a new fetchMissing function but ended up returning a dataframe with no info in it.
# Then tried adding a time.sleep(1) to the original function and it worked just fine.

In [98]:
# def fetchMissing(a, b):
#     '''This functions receives two pokemon id numbers as integers ('a' and 'b'),
#     and returns a dataframe containing the specified pokemon from the ids 'a' to 'b'.
#     '''
#     missing_pokes = []
#     for i in range(a,b+1):
#         response_api = requests.get(f"https://pokeapi.co/api/v2/pokemon/{i}/")
#         try:
#             response_api.json()
#             missing_pokes.append(response_api)
#         except JSONDecodeError:
#             print(f"Encountered a JSONDecodeError for pokemon id {i}")
               
#     return json_normalize(missing_pokes)

# Enriching the dataset using Web Scraping

In [3]:
# Using Wikidex

In [4]:
pokemon_url = "https://www.wikidex.net/wiki/Bulbasaur"
# To get the info of all of them will need to iterate on the url f"{Pokémon}"

In [5]:
response = requests.get(pokemon_url)
response

<Response [200]>

In [6]:
html = response.content
# html

## Weight

In [7]:
soup = BeautifulSoup(html, "html.parser")
# soup

In [8]:
weight = soup.find_all("tr", attrs = {"title" : "Peso del Pokémon"})
weight

[<tr title="Peso del Pokémon">
 <th><a href="/wiki/Lista_de_Pok%C3%A9mon_por_peso" title="Lista de Pokémon por peso">Peso</a>
 </th>
 <td>6,9 kg
 </td></tr>]

In [9]:
# Weight in kg of bulbasur
float(weight[0].getText().strip().split('\n')[-1].split(" ")[0].replace(",","."))

6.9

In [10]:
# Let's check if I can iterate over three pokémon to get their weights into a list

In [11]:
pokemon_fract = pokemon[:3]
pokemon_fract

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False


In [12]:
pokemon_list = pokemon_fract['Name'].to_list()
pokemon_list

['Bulbasaur', 'Ivysaur', 'Venusaur']

In [35]:
def getWeight(list_of_pokes):
    '''This functions appends the weight of each pokemon to a list.
    If the pokemon is not found in the wiki, it appends "NaN" instead.
    '''
    weight_list = []
    
    for i in range(len(list_of_pokes)):
        pokemon_url = f"https://www.wikidex.net/wiki/{list_of_pokes[i]}"
        response = requests.get(pokemon_url)
        html = response.content
        soup = BeautifulSoup(html, "html.parser")
        weight = soup.find_all("tr", attrs = {"title" : "Peso del Pokémon"})
        
        try:
            weight_float = float(weight[0].getText().strip().split('\n')[-1].split(" ")[0].replace(",","."))
            weight_list.append(weight_float)
        except IndexError:
            weight_list.append('NaN')
        
    return weight_list

In [23]:
getWeight(pokemon_list)

https://www.wikidex.net/wiki/Bulbasaur
https://www.wikidex.net/wiki/Ivysaur
https://www.wikidex.net/wiki/Venusaur


[6.9, 13.0, 100.0]

In [20]:
pokemon_fract5 = pokemon[:5]
pokemon_fract5

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


In [34]:
getWeight(pokemon_fract5['Name'].to_list())

https://www.wikidex.net/wiki/Bulbasaur
https://www.wikidex.net/wiki/Ivysaur
https://www.wikidex.net/wiki/Venusaur
https://www.wikidex.net/wiki/VenusaurMega Venusaur
https://www.wikidex.net/wiki/Charmander


[6.9, 13.0, 100.0, 'NaN', 8.5]

In [37]:
all_pokemon_list = pokemon['Name'].to_list()
# print(all_pokemon_list)

In [49]:
# all_weights = getWeight(all_pokemon_list)
# print(all_weights)

In [54]:
len(all_weights)

800

In [43]:
# Adding new column Weight to the pokedex
pokemon['Weight (kg)'] = all_weights

In [48]:
# Exporting dataset with added weight
pokemon.to_csv('./data/pokemon_weight.csv', index = False)

In [79]:
pokemon_nan_weight = pokemon[pokemon['Weight (kg)'] == 'NaN']['Name'].to_list()

In [80]:
pokemon_nan_weight

['VenusaurMega Venusaur',
 'CharizardMega Charizard X',
 'CharizardMega Charizard Y',
 'BlastoiseMega Blastoise',
 'BeedrillMega Beedrill',
 'PidgeotMega Pidgeot',
 'AlakazamMega Alakazam',
 'SlowbroMega Slowbro',
 'GengarMega Gengar',
 'KangaskhanMega Kangaskhan',
 'PinsirMega Pinsir',
 'GyaradosMega Gyarados',
 'AerodactylMega Aerodactyl',
 'MewtwoMega Mewtwo X',
 'MewtwoMega Mewtwo Y',
 'AmpharosMega Ampharos',
 'SteelixMega Steelix',
 'ScizorMega Scizor',
 'HeracrossMega Heracross',
 'HoundoomMega Houndoom',
 'TyranitarMega Tyranitar',
 'SceptileMega Sceptile',
 'BlazikenMega Blaziken',
 'SwampertMega Swampert',
 'GardevoirMega Gardevoir',
 'SableyeMega Sableye',
 'MawileMega Mawile',
 'AggronMega Aggron',
 'MedichamMega Medicham',
 'ManectricMega Manectric',
 'SharpedoMega Sharpedo',
 'CameruptMega Camerupt',
 'AltariaMega Altaria',
 'BanetteMega Banette',
 'AbsolMega Absol',
 'GlalieMega Glalie',
 'SalamenceMega Salamence',
 'MetagrossMega Metagross',
 'LatiasMega Latias',
 'Lati

In [74]:
count = 0
for i in all_weights:
    if i == 'NaN':
        count += 1

In [75]:
count

95

# Height

In [55]:
soup

<!DOCTYPE html>

<html class="client-nojs" dir="ltr" lang="es">
<head>
<meta charset="utf-8"/>
<title>Bulbasaur - WikiDex, la enciclopedia Pokémon</title>
<script>document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!0,"wgSeparatorTransformTable":[",\t."," \t,"],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","enero","febrero","marzo","abril","mayo","junio","julio","agosto","septiembre","octubre","noviembre","diciembre"],"wgRequestId":"6dafd66f561a896c0b16cadb","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"Bulbasaur","wgTitle":"Bulbasaur","wgCurRevisionId":2800901,"wgRevisionId":2800901,"wgArticleId":2366,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Artículos con enlaces rotos a archivos","Lista de Pokémon por número","Lista de Pokémon por nombre","Pokémon de color verde","Pokémon de tipo planta","Pokémon de tip

In [65]:
def getHeight(list_of_pokes):
    '''This functions appends the height of each pokemon to a list.
    If the pokemon is not found in the wiki, it appends "NaN" instead.
    '''
    height_list = []
    
    for i in range(len(list_of_pokes)):
        pokemon_url = f"https://www.wikidex.net/wiki/{list_of_pokes[i]}"
        response = requests.get(pokemon_url)
        html = response.content
        soup = BeautifulSoup(html, "html.parser")
        height = soup.find_all("tr", attrs = {"title" : "Altura del Pokémon"})
        
        try:
            height_float = float(height[0].getText().strip().split('\n')[-1].split(" ")[0].replace(",","."))
            height_list.append(height_float)
        except IndexError:
            height_list.append('NaN')
        
    return height_list

In [66]:
getHeight(pokemon_fract5['Name'].to_list())

[0.7, 1.0, 2.0, 'NaN', 0.6]

# Bulbapedia ->>> catch rate

In [None]:
# Using bulbapedia

In [None]:
pokemon_url_bulb = "https://bulbapedia.bulbagarden.net/wiki/Bulbasaur_(Pok%C3%A9mon)"

In [None]:
# Execute navigator.userAgent in Chrome developer console, withing the page - Ctrl+Shit+J
# Add that User-Agent into headers

headers = {"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36"}
response_bulb = requests.get(pokemon_url_2, headers = headers)
response_bulb

In [None]:
html_bulb = response_bulb.content
soup_bulb = BeautifulSoup(html_bulb, "html.parser")
catchrate = soup_bulb.find_all("tr", attrs = {"title" : "Peso del Pokémon"})