In [28]:
#Temtem scraper
from bs4 import BeautifulSoup
import sys
import pandas as pd
import requests
import re
import numpy as np

# URL GET Request
main_page = 'https://temtem.gamepedia.com'
species = '/Temtem_Species'
http_regex = re.compile('http*')

In [29]:
def get_request(main_page, tail):
    """Returns a bs with the content of the page"""
    rp = requests.get(main_page + tail)
    # BeautifulSoup content generation
    return BeautifulSoup(rp.content, 'html.parser')

def get_header(all_tr):
    """Returns the header of the tr list parameter"""
    stats_list = all_tr[1].findAll('th')    
    name = ['Number','Name']
    header = [i.get_text() for i in stats_list]
    # header modification to clarify the name of the columns
    header[0] = 'Type_1'
    header[1] = 'Type_2'
    header[-1] = 'Total' # The last column is the Sum of the stats
    name.extend(header)
    return name

def get_all_stats(all_tr, header):
    """Returns the stats of the Temtems"""
    lheader = len(header)
    temtems = all_tr[2:]
    dict_tem = {}
    for tm in temtems:
        stats = [i.get_text().rstrip() for i in tm.findAll('td')]
        if len(stats) < lheader:
            stats.insert(3, None)
        dict_tem[str(stats[0])] = stats
    return dict_tem

def get_dataframe(header, dict_temtems):
    """Returns a dataframe with the Temtems data in dict_temtems"""
    df_tm = pd.DataFrame(dict_temtems.values())
    df = df_tm.set_index(df_tm[0])
    df.drop(columns= [0], inplace=True)
    df.columns = header[1:]
    return df

def get_dict_traits(df):
    """Returns the dataframe with the traits lists inserted as a column"""
    name_list = [i.strip() for i in df.Name.to_list()]
    dict_traits = {}
    for name in name_list:
        table = get_request(main_page,  '/' + name).find('table', {'class':'infobox-table'})
        values = table.findAll('a')
        lsval = [v.string for v in values if str(v.string) 
        not in ['None', '⮜', '⮞', 'FreeTem!'] 
        and not http_regex.findall(str(v.string)) 
        and str(v.string) not in name_list]
        dict_traits[name] = lsval
    df['traits'] = list(dict_traits.values())
    return df

In [30]:
soup = get_request(main_page, species)

# Extracting the table
temtem_table = soup.findAll('table')[1]
all_tr = temtem_table.findAll('tr')

# Dataframe creation with header and data
header = get_header(all_tr)
dict_temtems = get_all_stats(all_tr, header)
df_tm = get_dataframe(header, dict_temtems)

df_traits = get_dict_traits(df_tm)

print(df_traits)

           Name   Type_1 Type_2  HP STA SPD ATK  DEF SPATK SPDEF Total  \
0                                                                        
2          Oree  Digital   None  61  74  35  65   44    32    31   342   
3       Zaobian  Digital   None  75  90  51  84   50    42    44   436   
7      Platypet    Water  Toxic  55  39  65  45   31    67    56   358   
8        Platox    Water  Toxic  62  44  74  50   35    76    63   404   
9     Platimous    Water  Toxic  71  49  82  56   39    90    70   457   
..          ...      ...    ...  ..  ..  ..  ..  ...   ...   ...   ...   
138      Vulvir     Fire  Earth  59  54  57  47   64    47    31   359   
139       Vulor     Fire  Earth  65  59  63  49   71    49    32   388   
140    Vulcrane     Fire  Earth  76  65  73  74   86    64    35   473   
141     Pigepic     Wind   None  54  72  58  60   72    45    72   433   
161      Anahir  Crystal   Fire  54  36  31  50  101    50   101   423   

                          traits  
0 

In [31]:
df_traits['traits']

0
2                       [Attack T]
3                       [Attack T]
7      [Toxic Affinity, Amphibian]
8           [Resistant, Resilient]
9                [Zen, Determined]
                  ...             
138     [Camaraderie, Caffeinated]
139    [Pyromaniac, Individualist]
140          [Receptive, Vigorous]
141    [Friendship, Fainted Curse]
161       [Trauma, Flawed Crystal]
Name: traits, Length: 86, dtype: object