In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

response = requests.get("https://pokemondb.net/evolution")
page = BeautifulSoup(response.text, "html.parser")

In [5]:
# get the all 'infocard-list-evo' divs from a block. each div is a single evolution tree.
# a single block can contain multiple evolution trees.
def get_list_evo(block):
    return block.find_all("div", class_="infocard-list-evo", recursive=False)

# get all evolution paths in an evolution tree (single infocard-list-evo div). 
# a tree can split into different branches, leading to multiple evolution paths
def get_evolution_paths(tree):
    children = tree.findChildren(recursive=False)
    list = []
    
    for child in children:
        if(not(child.has_attr('class'))): continue
        if(child['class'][0] == 'infocard' and len(child['class']) == 1):
            list.append(extract_id_form(child))

        if(child['class'][0] == 'infocard-evo-split'):
            split_paths = get_list_evo(child)
            
            branch_list = []
            for split in split_paths:
                branch_list.append(list + get_evolution_paths(split))
            
            list = branch_list
                
        
    return list

# extract id and regional form from an 'infocard' div, which is a single
# node / pokemon in an evolution path.
def extract_id_form(tag):
    #extract information
    info = {}
    small_text = tag.find_all("small", limit=2)

    #extract regional form
    if (int(small_text[0].text[1:]) < 801):
        # get id
        info['id'] = small_text[0].text[2:]

        # get form
        if "Alolan" in small_text[1].text:
            info['regional_form'] = "Alolan"
        elif "Galarian" in small_text[1].text:
            info['regional_form'] = "Galarian"
        elif "Hisuian" in small_text[1].text:
            info['regional_form'] = "Hisuian"
        elif "Paldean" in small_text[1].text:
            info['regional_form'] = "Paldean"
        else:
            info['regional_form'] = "Default"
    else:
        info['id'] = None
        info['regional_form'] = None
            
    return info

evo_paths = pd.DataFrame(columns = ["evo_1_id", "evo_1_form", "evo_2_id", "evo_2_form", "evo_3_id", "evo_3_form"])

evolution_set = page.find_all("div", class_="infocard-filter-block")

for evolution_block in evolution_set:
    evolutions = get_list_evo(evolution_block)
    for evolution in evolutions:
        paths = get_evolution_paths(evolution)
        # print(paths)
        paths = [paths] if "id" in paths[0] else paths
        
        path_transformed = {}
        for path in paths:
            try:
                for i in range(3):
                    match i:
                        case 0:
                            path_transformed['evo_1_id'] = path[i]['id']
                            path_transformed['evo_1_form'] = path[i]['regional_form']
                        case 1:
                            path_transformed['evo_2_id'] = path[i]['id']
                            path_transformed['evo_2_form'] = path[i]['regional_form']
                        case 2:
                            path_transformed['evo_3_id'] = path[i]['id']
                            path_transformed['evo_3_form'] = path[i]['regional_form']           
            except:
                pass

            new_row = pd.Series(path_transformed).to_frame().T
            evo_paths = pd.concat([evo_paths, new_row], ignore_index=True)

evo_paths

Unnamed: 0,evo_1_id,evo_1_form,evo_2_id,evo_2_form,evo_3_id,evo_3_form
0,001,Default,002,Default,003,Default
1,004,Default,005,Default,006,Default
2,007,Default,008,Default,009,Default
3,010,Default,011,Default,012,Default
4,013,Default,014,Default,015,Default
...,...,...,...,...,...,...
397,,,,,,
398,,,,,,
399,,,,,,
400,,,,,,


In [9]:
df = pd.read_csv('pokemon.csv')
    

In [21]:
abilities = df['abilities']

ability_1 = []
ability_2 = []
for a in abilities:
    a_list = eval(a)
    ability_1.append(a_list[0])
    ability_2.append(a_list[1] if len(a_list) > 1 else None)
    
df['ability_1'] = ability_1
df['ability_2'] = ability_2



Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary,ability_1,ability_2
0,"['Overgrow', 'Chlorophyll']",1.00,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,65,65,45,grass,poison,6.9,1,0,Overgrow,Chlorophyll
1,"['Overgrow', 'Chlorophyll']",1.00,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,80,80,60,grass,poison,13.0,1,0,Overgrow,Chlorophyll
2,"['Overgrow', 'Chlorophyll']",1.00,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,122,120,80,grass,poison,100.0,1,0,Overgrow,Chlorophyll
3,"['Blaze', 'Solar Power']",0.50,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,60,50,65,fire,,8.5,1,0,Blaze,Solar Power
4,"['Blaze', 'Solar Power']",0.50,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,80,65,80,fire,,19.0,1,0,Blaze,Solar Power
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
796,['Beast Boost'],0.25,1.0,0.5,2.0,0.5,1.0,2.0,0.5,1.0,...,107,101,61,steel,flying,999.9,7,1,Beast Boost,
797,['Beast Boost'],1.00,1.0,0.5,0.5,0.5,2.0,4.0,1.0,1.0,...,59,31,109,grass,steel,0.1,7,1,Beast Boost,
798,['Beast Boost'],2.00,0.5,2.0,0.5,4.0,2.0,0.5,1.0,0.5,...,97,53,43,dark,dragon,888.0,7,1,Beast Boost,
799,['Prism Armor'],2.00,2.0,1.0,1.0,1.0,0.5,1.0,1.0,2.0,...,127,89,79,psychic,,230.0,7,1,Prism Armor,
