# Fonctions Projet Wagon

## Fonction DF Stats from Nintendo

In [18]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [24]:
import requests
from bs4 import BeautifulSoup
import time
import pandas as pd
from pokemon_card_generator.data.pokemon_list import pokemon_list
import pickle

In [27]:
def stats_from_pkm_list(pokemon_list: list,
                        df: bool = False,
                        parser: str = "html.parser",
                        x: int = 0.5,
                        verbose: bool = True):
    """ Function wich return Pandas dataframe or dictionary from pokemon list /
    :param pklist: Input string list of name,
    :param df: If this is True, function return directly a Pandas dataFrame
    :param parser: Differents parsers accepted "html.parser", "lxml" or "html5lib", for that think to install parser
    example: pip install html5lib
    :param x: Delay between each different request, warning, too short a delay can create a risk of ban
    :param verbose: Activate or not verbosity """

    uri = "https://www.pokemon.com/us/pokedex/"
    d = {}
    # Special name
    nidmal = "nidoran-male"
    nidfem = "nidoran-female"
    exept_name = {
        "Type:_Null": "type-null",
        "Mime_Jr": "mime-jr",
        r"Sirfetch%27d": "sirfetchd",
        "Mr._Rime": "mr-rime",
        "Tapu_Koko": "Tapu-Koko",
        "Tapu_Lele": "Tapu-Lele",
        "Tapu_Bulu": "Tapu-Bulu",
        "Tapu_Fini": "Tapu-Fini",
        r"Farfetch%27d": "Farfetchd",
        "Mr._Mime": "Mr-Mime"
    }

    #
    for cpt, name in enumerate(pokemon_list, 1):
        if "♂" in name:
            url = f"{uri}{nidmal}"
        elif "♀" in name:
            url = f"{uri}{nidfem}"
        elif name in exept_name.keys():
            url = f"{uri}{exept_name[name]}"
        else:
            url = f"{uri}{name}"

        #
        response = requests.get(url)
        soup = BeautifulSoup(response.content, "html.parser")
        #
        span_title_list = soup.find_all("span", attrs={"attribute-title"})[0:5]
        span_value_list = soup.find_all("span", attrs={"attribute-value"})[0:5]
        d[name] = {}
        #
        if verbose:
            print(f"{cpt}/{len(pokemon_list)}. Scrapping for {url} done !")
        #
        for t, v in zip(span_title_list, span_value_list):
            d[name][t.text] = v.text
        time.sleep(x)
    #
    if df:
        df = pd.concat(
            {k: pd.DataFrame.from_dict(v, 'index')
             for k, v in d.items()},
            axis=1)
        df = df.T.reset_index().drop(columns=["level_1"])
        df = df.rename({"level_0": "Name"}, axis=1)
        df = df.set_index("Name")
        return df
    else:
        return d


def missing_list(pokemon_list, df):
    df_to_list = df.index.to_list()
    m_list = [item for item in pokemon_list if item not in df_to_list]
    return m_list

In [28]:
df_stats = stats_from_pkm_list(pokemon_list,df=True,parser='lxml')

1/898. Scrapping for https://www.pokemon.com/us/pokedex/Bulbasaur done !
2/898. Scrapping for https://www.pokemon.com/us/pokedex/Chikorita done !
3/898. Scrapping for https://www.pokemon.com/us/pokedex/Treecko done !
4/898. Scrapping for https://www.pokemon.com/us/pokedex/Turtwig done !
5/898. Scrapping for https://www.pokemon.com/us/pokedex/Victini done !
6/898. Scrapping for https://www.pokemon.com/us/pokedex/Chespin done !
7/898. Scrapping for https://www.pokemon.com/us/pokedex/Rowlet done !
8/898. Scrapping for https://www.pokemon.com/us/pokedex/Grookey done !
9/898. Scrapping for https://www.pokemon.com/us/pokedex/Ivysaur done !
10/898. Scrapping for https://www.pokemon.com/us/pokedex/Bayleef done !
11/898. Scrapping for https://www.pokemon.com/us/pokedex/Grovyle done !
12/898. Scrapping for https://www.pokemon.com/us/pokedex/Grotle done !
13/898. Scrapping for https://www.pokemon.com/us/pokedex/Snivy done !
14/898. Scrapping for https://www.pokemon.com/us/pokedex/Quilladin done !

KeyboardInterrupt: 

In [22]:
df_stats

Unnamed: 0_level_0,Height,Weight,Gender,Category,Abilities
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Farfetch%27d,"2' 07""",33.1 lbs,\n\n\n,Wild Duck,Keen Eye


In [8]:
missing_list(pokemon_list, df_stats)

['Pikachudagger',
 'Nidoran♀',
 'Nidoran♂',
 'Type:Null',
 'MimeJr.',
 "Sirfetch'd",
 'Mr.Rime',
 'TapuKoko',
 'TapuLele',
 'TapuBulu',
 'TapuFini',
 "Farfetch'd",
 'Mr.Mime',
 'Eeveedagger']

In [None]:
# Export pickle file
with open("df_stats_lxml.pkl", "wb") as file:
    pickle.dump(df_stats, file)