In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
import re
import ast

In [2]:
## Парсим информацию
url = "https://gensh.honeyhunterworld.com/"
response = requests.get(url+"fam_chars/?lang=EN")
soup = BeautifulSoup(response.text, "html.parser")

In [None]:
script_tag = soup.find("script", string=re.compile("sortable_data.push"))
if script_tag:
    script_text = script_tag.string

    match = re.search(
        r"sortable_data.push\(\s*(\[\[.*?\]\])\s*\);", script_text, re.DOTALL)

    if match:
        data = match.group(1)

        table_data = ast.literal_eval(data)
    else:
        print("Не удалось извлечь данные.")
else:
    print("Не найден скрипт")

In [4]:
data = []
stop_words = "Test", "Trial"

for character in table_data:
    string = BeautifulSoup(character[0], "html.parser")
    char_name = string.find("img").get('alt')

    if any(stop in char_name for stop in stop_words):
        continue

    data.append({})
    char_link = url+string.find("a").get("href").replace("/", "").replace("\\", "")
    char_page = requests.get(char_link)
    char_page_soup = BeautifulSoup(char_page.text, "html.parser")
    table_rows = char_page_soup.find("table", class_="genshin_table main_table").find_all("tr")

    for row in table_rows:
        cells = row.find_all("td")
        for cell in cells:
            if cell.text in ("Name", "Association", "Month of Birth", "Weapon", "Element"):
                data[-1][cell.text.lower()] = cells[-1].text.lstrip("\xa0")
            elif cell.text == "Rarity":
                data[-1]["rarity"] = len(cells[-1].find_all("img"))

In [5]:
traveler = data[4].copy()
traveler["association"] = None
for idx, char in enumerate(data):
    if char["name"]=="Traveler":
        del data[idx]
    elif char["association"].lower() == "fatui":
        char["association"] = "SNEZHNAYA"
    elif char["association"].lower() == "ranger":
        char["association"] = None

In [6]:
data_dict = {item['name']: item for item in data}
data_dict

{'Kamisato Ayaka': {'name': 'Kamisato Ayaka',
  'association': 'INAZUMA',
  'rarity': 5,
  'weapon': 'Sword',
  'element': 'Cryo',
  'month of birth': '9'},
 'Jean': {'name': 'Jean',
  'association': 'MONDSTADT',
  'rarity': 5,
  'weapon': 'Sword',
  'element': 'Anemo',
  'month of birth': '3'},
 'Lisa': {'name': 'Lisa',
  'association': 'MONDSTADT',
  'rarity': 4,
  'weapon': 'Catalyst',
  'element': 'Electro',
  'month of birth': '6'},
 'Barbara': {'name': 'Barbara',
  'association': 'MONDSTADT',
  'rarity': 4,
  'weapon': 'Catalyst',
  'element': 'Hydro',
  'month of birth': '7'},
 'Kaeya': {'name': 'Kaeya',
  'association': 'MONDSTADT',
  'rarity': 4,
  'weapon': 'Sword',
  'element': 'Cryo',
  'month of birth': '11'},
 'Diluc': {'name': 'Diluc',
  'association': 'MONDSTADT',
  'rarity': 5,
  'weapon': 'Claymore',
  'element': 'Pyro',
  'month of birth': '4'},
 'Razor': {'name': 'Razor',
  'association': 'MONDSTADT',
  'rarity': 4,
  'weapon': 'Claymore',
  'element': 'Electro',
  

In [7]:
# Парсим рейтинг и роль персонажа
tier_list_url = "https://genshin.gg/tier-list/"
responce = requests.get(tier_list_url)
tier_list_soup = BeautifulSoup(responce.text, "html.parser")

In [8]:
zones = tier_list_soup.find_all("div", class_="dropzone-row")

alter_names = {
    "Tartaglia": "Childe"}

for zone in zones:
    rate = zone.find("div", class_=re.compile(r"^dropzone-title [A-Z]$")).text
    zone_chars = zone.find("div", "dropzone-characters --mobile d-md-none").find_all("a")

    for char in zone_chars:
        name = char.find("h2", "tierlist-name").text
        role = char.find("h3", "tierlist-role").text

        for key in data_dict.keys():
            if name in key:
                data_dict[key]["role"] = role
                data_dict[key]["power_rating"] = rate
                break

            elif "Traveler" in name:
                elem = char.find("div").find_all("img")[-1].get("alt")
                data_dict[name] = traveler.copy()
                data_dict[name]["name"] = name
                data_dict[name]["role"] = role
                data_dict[name]["power_rating"] = rate
                data_dict[name]["element"] = elem
                break

            elif key in alter_names and alter_names[key] == name:
                data_dict[key]["role"] = role
                data_dict[key]["power_rating"] = rate
                break

In [9]:
data_dict

{'Kamisato Ayaka': {'name': 'Kamisato Ayaka',
  'association': 'INAZUMA',
  'rarity': 5,
  'weapon': 'Sword',
  'element': 'Cryo',
  'month of birth': '9',
  'role': 'Main DPS',
  'power_rating': 'B'},
 'Jean': {'name': 'Jean',
  'association': 'MONDSTADT',
  'rarity': 5,
  'weapon': 'Sword',
  'element': 'Anemo',
  'month of birth': '3',
  'role': 'Support',
  'power_rating': 'B'},
 'Lisa': {'name': 'Lisa',
  'association': 'MONDSTADT',
  'rarity': 4,
  'weapon': 'Catalyst',
  'element': 'Electro',
  'month of birth': '6',
  'role': 'Support',
  'power_rating': 'C'},
 'Barbara': {'name': 'Barbara',
  'association': 'MONDSTADT',
  'rarity': 4,
  'weapon': 'Catalyst',
  'element': 'Hydro',
  'month of birth': '7',
  'role': 'Support',
  'power_rating': 'C'},
 'Kaeya': {'name': 'Kaeya',
  'association': 'MONDSTADT',
  'rarity': 4,
  'weapon': 'Sword',
  'element': 'Cryo',
  'month of birth': '11',
  'role': 'Sub DPS',
  'power_rating': 'C'},
 'Diluc': {'name': 'Diluc',
  'association': '

In [10]:
df = pd.DataFrame(data_dict).T
df.reset_index(drop=True, inplace=True)
df[df.isnull().any(axis=1)]

Unnamed: 0,name,association,rarity,weapon,element,month of birth,role,power_rating
46,Aloy,,4,Bow,Cryo,4,Sub DPS,D
96,Escoffier,FONTAINE,5,Polearm,Cryo,6,,
97,Ifa,NATLAN,4,Catalyst,Anemo,3,,
98,Skirk,SCOURGE,5,Sword,Cryo,1,,
99,Dahlia,MONDSTADT,4,Sword,Hydro,1,,
100,Traveler (Dendro),,5,Sword,Dendro,0,Support,C
101,Traveler (Pyro),,5,Sword,Pyro,0,Support,C
102,Traveler (Anemo),,5,Sword,Anemo,0,Sub DPS,D
103,Traveler (Electro),,5,Sword,Electro,0,Support,D
104,Traveler (Geo),,5,Sword,Geo,0,Sub DPS,D


In [11]:
df[df.isnull().any(axis=1)]

Unnamed: 0,name,association,rarity,weapon,element,month of birth,role,power_rating
46,Aloy,,4,Bow,Cryo,4,Sub DPS,D
96,Escoffier,FONTAINE,5,Polearm,Cryo,6,,
97,Ifa,NATLAN,4,Catalyst,Anemo,3,,
98,Skirk,SCOURGE,5,Sword,Cryo,1,,
99,Dahlia,MONDSTADT,4,Sword,Hydro,1,,
100,Traveler (Dendro),,5,Sword,Dendro,0,Support,C
101,Traveler (Pyro),,5,Sword,Pyro,0,Support,C
102,Traveler (Anemo),,5,Sword,Anemo,0,Sub DPS,D
103,Traveler (Electro),,5,Sword,Electro,0,Support,D
104,Traveler (Geo),,5,Sword,Geo,0,Sub DPS,D


In [13]:
df.to_csv("genshin_data.csv")