In [1]:
# Passo 1 - Importando bibliotecas
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# Passo 2 - Realizar uma requisição ao site
url = 'https://game8.co/games/Zenless-Zone-Zero/archives/435684#hl_1'
response = requests.get(url)
html_content = response.content

In [3]:
# Passo 3 - Realizar o parser do HTML
soup = BeautifulSoup(html_content, 'html.parser')

# Isso transforma o HTML bruto em um Objeto BeatifulSoup capaz de realizar o find e o find_all

In [4]:
# Passo 4 - Encontrar a tabela e extrair os dados
table = soup.find('table', {'class':'a-table a-table a-table table--fixed flexible-cell'})
rows = table.find_all('tr')

# Extrair o cabeçalho da tabela
header = [th.text.strip() for th in rows[0].find_all('th')]

data = []
for row in rows:
    cols = row.find_all('td')
    cols = [col.text.strip() for col in cols]
    data.append(cols)

# Assumindo que a primeira linha é o cabeçalho
characters = data[1:]
print(characters)

[['Burnice', 'Fire', 'Anomaly', 'Pierce', 'Sons of Calydon', 'Tier 0', 'S Rank'], ['Caesar', 'Physical', 'Defense', 'Slash', 'Sons of Calydon', 'Tier 0', 'S Rank'], ['Jane', 'Physical', 'Anomaly', 'Slash', 'Criminal Investigation Team', 'Tier 0', 'S Rank'], ['Qingyi', 'Electric', 'Stun', 'Strike', 'Criminal Investigation Team', 'Tier 0', 'S Rank'], ['Zhu Yuan', 'Ether', 'Attack', 'Pierce', 'Criminal Investigation Team', 'Tier 0', 'S Rank'], ['Ellen', 'Ice', 'Attack', 'Slash', 'Victoria Housekeeping', 'Tier 0', 'S Rank'], ['Grace', 'Electric', 'Anomaly', 'Pierce', 'Belobog Heavy Industries', 'Tier 1', 'S Rank'], ['Soldier 11', 'Fire', 'Attack', 'Slash', 'Obol Squad', 'Tier 1', 'S Rank'], ['Nekomata', 'Physical', 'Attack', 'Slash', 'Cunning Hares', 'Tier 2', 'S Rank'], ['Lycaon', 'Ice', 'Stun', 'Strike', 'Victoria Housekeeping', 'Tier 0', 'S Rank'], ['Koleda', 'Fire', 'Stun', 'Strike', 'Belobog Heavy Industries', 'Tier 1', 'S Rank'], ['Rina', 'Electric', 'Support', 'Strike', 'Victoria Ho

In [5]:
# Passo 4.5 - Realizar uma alteração no personagem Jane Doe

# O motivo dessa alteração é um problema no resultado final, visto que em um site está Jane e no outro Jane Doe

for specific_character in characters:
    if specific_character[0] == "Jane":
        specific_character[0] = "Jane Doe" 

print(characters)

[['Burnice', 'Fire', 'Anomaly', 'Pierce', 'Sons of Calydon', 'Tier 0', 'S Rank'], ['Caesar', 'Physical', 'Defense', 'Slash', 'Sons of Calydon', 'Tier 0', 'S Rank'], ['Jane Doe', 'Physical', 'Anomaly', 'Slash', 'Criminal Investigation Team', 'Tier 0', 'S Rank'], ['Qingyi', 'Electric', 'Stun', 'Strike', 'Criminal Investigation Team', 'Tier 0', 'S Rank'], ['Zhu Yuan', 'Ether', 'Attack', 'Pierce', 'Criminal Investigation Team', 'Tier 0', 'S Rank'], ['Ellen', 'Ice', 'Attack', 'Slash', 'Victoria Housekeeping', 'Tier 0', 'S Rank'], ['Grace', 'Electric', 'Anomaly', 'Pierce', 'Belobog Heavy Industries', 'Tier 1', 'S Rank'], ['Soldier 11', 'Fire', 'Attack', 'Slash', 'Obol Squad', 'Tier 1', 'S Rank'], ['Nekomata', 'Physical', 'Attack', 'Slash', 'Cunning Hares', 'Tier 2', 'S Rank'], ['Lycaon', 'Ice', 'Stun', 'Strike', 'Victoria Housekeeping', 'Tier 0', 'S Rank'], ['Koleda', 'Fire', 'Stun', 'Strike', 'Belobog Heavy Industries', 'Tier 1', 'S Rank'], ['Rina', 'Electric', 'Support', 'Strike', 'Victori

In [6]:
# Passo 5 - Filtrar para obter os dados que importam de cada personagem
filtered_characters = []
filtered_headers = []
for character in characters:
    # Remover o terceiro elemento usando slicing
    filtered_character = character[:5] + character[6:]
    filtered_characters.append(filtered_character)
    
# Remove elementos do header usando slicing
filtered_header = header[:5] + header[6:]
filtered_headers.append(filtered_header)

# Remove a lista de strings do header de uma lista
filtered_headers = filtered_headers.pop()
filtered_headers = [x.lower() for x in filtered_headers]

print(filtered_headers)
print(filtered_characters)

['agent', 'attribute', 'specialty', 'type', 'faction', 'rarity']
[['Burnice', 'Fire', 'Anomaly', 'Pierce', 'Sons of Calydon', 'S Rank'], ['Caesar', 'Physical', 'Defense', 'Slash', 'Sons of Calydon', 'S Rank'], ['Jane Doe', 'Physical', 'Anomaly', 'Slash', 'Criminal Investigation Team', 'S Rank'], ['Qingyi', 'Electric', 'Stun', 'Strike', 'Criminal Investigation Team', 'S Rank'], ['Zhu Yuan', 'Ether', 'Attack', 'Pierce', 'Criminal Investigation Team', 'S Rank'], ['Ellen', 'Ice', 'Attack', 'Slash', 'Victoria Housekeeping', 'S Rank'], ['Grace', 'Electric', 'Anomaly', 'Pierce', 'Belobog Heavy Industries', 'S Rank'], ['Soldier 11', 'Fire', 'Attack', 'Slash', 'Obol Squad', 'S Rank'], ['Nekomata', 'Physical', 'Attack', 'Slash', 'Cunning Hares', 'S Rank'], ['Lycaon', 'Ice', 'Stun', 'Strike', 'Victoria Housekeeping', 'S Rank'], ['Koleda', 'Fire', 'Stun', 'Strike', 'Belobog Heavy Industries', 'S Rank'], ['Rina', 'Electric', 'Support', 'Strike', 'Victoria Housekeeping', 'S Rank'], ['Seth', 'Electri

In [7]:
# Passo 6 - Criar um DataFrame com os dados
df = pd.DataFrame(filtered_characters, columns=filtered_headers)
print(df.columns)
print(df)

Index(['agent', 'attribute', 'specialty', 'type', 'faction', 'rarity'], dtype='object')
         agent attribute specialty    type                      faction  \
0      Burnice      Fire   Anomaly  Pierce              Sons of Calydon   
1       Caesar  Physical   Defense   Slash              Sons of Calydon   
2     Jane Doe  Physical   Anomaly   Slash  Criminal Investigation Team   
3       Qingyi  Electric      Stun  Strike  Criminal Investigation Team   
4     Zhu Yuan     Ether    Attack  Pierce  Criminal Investigation Team   
5        Ellen       Ice    Attack   Slash        Victoria Housekeeping   
6        Grace  Electric   Anomaly  Pierce     Belobog Heavy Industries   
7   Soldier 11      Fire    Attack   Slash                   Obol Squad   
8     Nekomata  Physical    Attack   Slash                Cunning Hares   
9       Lycaon       Ice      Stun  Strike        Victoria Housekeeping   
10      Koleda      Fire      Stun  Strike     Belobog Heavy Industries   
11        Ri

In [8]:
# Passo 7 - Transformar DataFrame em CSV
df.to_csv('zzz_characters_data.csv', index=False)

In [9]:
# Passo 8 - Ordenar DataFrame pelo nome dos personagens (Agentes)
df_ordenado = df.sort_values('agent')
print(df_ordenado)

         agent attribute specialty    type                      faction  \
21        Anby  Electric      Stun   Slash                Cunning Hares   
16       Anton  Electric    Attack  Pierce     Belobog Heavy Industries   
17         Ben      Fire   Defense  Strike     Belobog Heavy Industries   
19       Billy  Physical    Attack  Pierce                Cunning Hares   
0      Burnice      Fire   Anomaly  Pierce              Sons of Calydon   
1       Caesar  Physical   Defense   Slash              Sons of Calydon   
15       Corin  Physical    Attack   Slash        Victoria Housekeeping   
5        Ellen       Ice    Attack   Slash        Victoria Housekeeping   
6        Grace  Electric   Anomaly  Pierce     Belobog Heavy Industries   
2     Jane Doe  Physical   Anomaly   Slash  Criminal Investigation Team   
10      Koleda      Fire      Stun  Strike     Belobog Heavy Industries   
14        Lucy      Fire   Support  Strike              Sons of Calydon   
9       Lycaon       Ice 

## Adicionando base stats do nivel 60 dos personagens

In [10]:
# Passo 1 - Realizar uma requisição ao site
url = 'https://www.prydwen.gg/zenless/characters-stats/'
response = requests.get(url)
html_content = response.content

In [11]:
# Passo 2 - Realizar o parser do HTML
soup = BeautifulSoup(html_content, 'html.parser')
# Isso transforma o HTML bruto em um Objeto BeatifulSoup capaz de realizar o find e o find_all

In [12]:
# Passo 4 - Encontrar a tabela e extrair os dados
table = soup.find('table', {'class':'stats-table table table-striped table-bordered'})
rows = table.find_all('tr')

# Extrair o cabeçalho da tabela
header = [th.text.strip() for th in rows[0].find_all('th')]

data = []
for row in rows:
    cols = row.find_all('td')
    cols = [col.text.strip() for col in cols]
    data.append(cols)

# Assumindo que a primeira linha é o cabeçalho
base_stats = data[1:]
print(base_stats)

[['Nicole', '8146', '623', '649', '5.00%', '50.00%', '0%', '88', '93', '1.56'], ['Ben', '8578', '724', '653', '5.00%', '50.00%', '0%', '95', '90', '1.56'], ['Anby', '7501', '613', '659', '5.00%', '50.00%', '0%', '136', '93', '1.2'], ['Lucy', '8026', '613', '659', '5.00%', '50.00%', '0%', '86', '93', '1.56'], ['Soukaku', '8026', '598', '666', '5.00%', '50.00%', '0%', '86', '96', '1.56'], ['Rina', '8609', '601', '717', '5.00%', '50.00%', '14%', '83', '92', '1.2'], ['Lycaon', '8416', '607', '729', '5.00%', '50.00%', '0%', '137', '90', '1.2'], ['Koleda', '8127', '595', '736', '5.00%', '50.00%', '0%', '134', '96', '1.2'], ['Piper', '6977', '613', '758', '5.00%', '50.00%', '0%', '86', '118', '1.56'], ['Billy', '6907', '607', '787', '19.40%', '50.00%', '0%', '91', '91', '1.2'], ['Anton', '7219', '623', '792', '19.40%', '50.00%', '0%', '95', '90', '1.2'], ['Corin', '6977', '605', '807', '5.00%', '78.80%', '0%', '93', '96', '1.2'], ['Grace', '7483', '601', '826', '5.00%', '50.00%', '0%', '83', 

In [13]:
# Passo 5 - Ajustar os elementos do header
header_lower = [x.lower() for x in header]
header_corrigido = [x.replace(' ', '_').replace('.', '') for x in header_lower]
header_corrigido = [x.replace('name', 'agent') for x in header_corrigido]
print(header_corrigido)

['agent', 'hp', 'def', 'atk', 'crit_rate', 'crit_dmg', 'pen_ratio', 'impact', 'atr_mastery', 'energy']


In [14]:
# Passo 6 - Transformar base_Stats em um DataFrame
df2 = pd.DataFrame(base_stats, columns=header_corrigido)
df2 = df2.sort_values('agent')
print(df2)

         agent    hp  def  atk crit_rate crit_dmg pen_ratio impact  \
2         Anby  7501  613  659     5.00%   50.00%        0%    136   
10       Anton  7219  623  792    19.40%   50.00%        0%     95   
1          Ben  8578  724  653     5.00%   50.00%        0%     95   
9        Billy  6907  607  787    19.40%   50.00%        0%     91   
21     Burnice  7368  601  863     5.00%   50.00%        0%     83   
20      Caesar  9526  754  712     5.00%   50.00%        0%    123   
11       Corin  6977  605  807     5.00%   78.80%        0%     93   
16       Ellen  7674  607  938    19.40%   50.00%        0%     93   
12       Grace  7483  601  826     5.00%   50.00%        0%     83   
18    Jane Doe  7789  607  881     5.00%   50.00%        0%     86   
7       Koleda  8127  595  736     5.00%   50.00%        0%    134   
3         Lucy  8026  613  659     5.00%   50.00%        0%     86   
6       Lycaon  8416  607  729     5.00%   50.00%        0%    137   
14    Nekomata  7560

In [15]:
# Passo 7 - Juntar DataFrames com base em um elemento comum, Agent
df_combined = pd.merge(df_ordenado, df2, on='agent')
print(df_combined)

         agent attribute specialty    type                      faction  \
0         Anby  Electric      Stun   Slash                Cunning Hares   
1        Anton  Electric    Attack  Pierce     Belobog Heavy Industries   
2          Ben      Fire   Defense  Strike     Belobog Heavy Industries   
3        Billy  Physical    Attack  Pierce                Cunning Hares   
4      Burnice      Fire   Anomaly  Pierce              Sons of Calydon   
5       Caesar  Physical   Defense   Slash              Sons of Calydon   
6        Corin  Physical    Attack   Slash        Victoria Housekeeping   
7        Ellen       Ice    Attack   Slash        Victoria Housekeeping   
8        Grace  Electric   Anomaly  Pierce     Belobog Heavy Industries   
9     Jane Doe  Physical   Anomaly   Slash  Criminal Investigation Team   
10      Koleda      Fire      Stun  Strike     Belobog Heavy Industries   
11        Lucy      Fire   Support  Strike              Sons of Calydon   
12      Lycaon       Ice 

In [16]:
# Passo 8 - Transformar DataFrame em CSV
df_combined.to_csv('zzz_characters_data.csv', index=False)