### Крок 1. Отримання HTML-сторінки Food

In [38]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
import re
import numpy as np

url = "https://zelda.fandom.com/wiki/Food"
html = urlopen(url).read().decode("utf-8")
soup = BeautifulSoup(html, "html.parser")

target_cols = ["Class", "Subclass", "Material", "Hearts",
               "Effect class when cooked", "Bonus effect when cooked", "Duration increase"]

heart_val = {"BotW_Heart_Icon.png": 1.0,
             "BotW_Half-Heart_Icon.png": 0.5,
             "BotW_Quarter_Heart_Icon.png": 0.25}

### Крок 2. Парсинг усіх wikitables

In [39]:
tables = soup.find_all("table", {"class": "wikitable"})
dfs = pd.read_html(str(tables))
dfs_raw = pd.read_html(str(tables))
print(f"Found tables: {len(dfs)}")

Found tables: 12


  dfs = pd.read_html(str(tables))
  dfs_raw = pd.read_html(str(tables))


### Крок 3. Уніфікація стовпців

In [None]:
out = []

for tbl, df_raw in zip(tables, dfs_raw):
    if isinstance(df_raw.columns, pd.MultiIndex):
        df_raw.columns = [" ".join(map(str, c)).strip() for c in df_raw.columns]
    else:
        df_raw.columns = df_raw.columns.str.strip()
    rename_map = {c: c for c in df_raw.columns}
    for t in target_cols:
        for c in df_raw.columns:
            if t.lower() in c.lower():
                rename_map[c] = t
    df_raw = df_raw.rename(columns=rename_map)
    if "Material" not in df_raw.columns:
        continue

    data_tr = [tr for tr in tbl.find_all("tr") if tr.find("td")]
    df_rows = [row for _, row in df_raw.iterrows()]
    min_len = min(len(data_tr), len(df_rows))
    data_tr = data_tr[:min_len]
    df_rows = df_rows[:min_len]

    table_rows = []
    for tr, raw_row in zip(data_tr, df_rows):
        tr_html = str(tr)
        hearts = sum(val for key, val in heart_val.items()
                     for _ in re.findall(re.escape(key), tr_html))
        bonus  = "max" if "BotW_Extra_Heart_Icon.png" in tr_html else np.nan

        row = raw_row[target_cols].copy()
        row["Hearts"] = hearts if hearts > 0 else np.nan
        row["Bonus effect when cooked"] = bonus
        table_rows.append(row)

    if table_rows:
        out.append(pd.DataFrame(table_rows))

if not out:
    raise ValueError("Rows which contain Material and Hearts are not being found.")
food = pd.concat(out, ignore_index=True).drop_duplicates()

### Крок 4. Запис CSV

In [41]:
food.to_csv("zelda_food.csv", index=False)
print("Shape:", food.shape)
print(food.head())
print(food.describe(include="all").T)

Shape: (69, 7)
   Class Subclass           Material  Hearts Effect class when cooked  \
0  Fruit    Fruit              Apple     1.0                      NaN   
1  Fruit    Fruit  Fleet-Lotus Seeds     1.0                    Hasty   
2  Fruit    Fruit      Hearty Durian     6.0                   Hearty   
3  Fruit    Fruit         Hydromelon     1.0                   Chilly   
4  Fruit    Fruit     Mighty Bananas     1.0                   Mighty   

  Bonus effect when cooked Duration increase  
0                      NaN              0:30  
1                      NaN               NaN  
2                      max               NaN  
3                      NaN               NaN  
4                      NaN               NaN  
                         count unique              top freq      mean  \
Class                       69      7          Seafood   18       NaN   
Subclass                    69      9  Fish and Snails   15       NaN   
Material                    69     69        