In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import json
import os
import time

# 1. Set up WebDriver (here using Chrome)
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--disable-gpu')
options.add_argument('--disable-extensions')
options.add_argument('--disable-logging')
options.add_argument('--log-level=3')
options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')

driver = webdriver.Chrome(options=options)

In [2]:
import re

def extract_skill_ids(html_content):
    """
    Extract skill IDs from HTML content that match the pattern (XXXXX)
    Returns a list of IDs as strings without parentheses
    """
    # Pattern to match numbers in parentheses
    pattern = r'\((\d+)\)'
    
    # Find all matches and extract the numbers (without parentheses)
    skill_ids = re.findall(pattern, html_content)
    
    return skill_ids

# Example usage:
def extract_from_file(file_path):
    """
    Read HTML file and extract skill IDs
    """
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
    
    return extract_skill_ids(content)

In [3]:
with open("umamusume.html", 'r', encoding='utf-8') as f:
    html_content = f.read()
skill_ids = extract_skill_ids(html_content)

skill_mapping = {}
if os.path.exists('skill_mapping_remaining.json'):
    with open('skill_mapping_remaining.json', 'r', encoding='utf-8') as f:
        skill_mapping = json.load(f)
    print(f"Loaded {len(skill_mapping)} existing skill mappings")
else:
    print("No existing skill mapping file found, starting fresh")

# Filter out skill IDs that are already in the mapping
remaining_skill_ids = [skill_id for skill_id in skill_ids if skill_id not in skill_mapping]
print(f"Total skill IDs found: {len(skill_ids)}")
print(f"Already processed: {len(skill_ids) - len(remaining_skill_ids)}")
print(f"Remaining to process: {len(remaining_skill_ids)}")

Loaded 521 existing skill mappings
Total skill IDs found: 522
Already processed: 521
Remaining to process: 1


In [4]:
for i, skill_id in enumerate(remaining_skill_ids):
    try:
        print(f"Processing skill {i+1}/{len(remaining_skill_ids)}: {skill_id}")
        
        # 3. Load the Skill Condition Viewer page for this skill
        url = f"https://gametora.com/umamusume/skill-condition-viewer?skill={skill_id}"
        driver.get(url)

        # 4. Wait until the skill name span is visible
        skill_name_elem = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, "span.utils_linkcolor__rvv3k"))
        )

        # 5. Extract and display the skill name text
        skill_name = skill_name_elem.text
        print(f"Skill ID {skill_id} → Skill Name: {skill_name}")

        # 6. Add the new skill to the existing mapping
        skill_mapping[skill_id] = skill_name
        
        # Add a small delay to avoid overwhelming the server
        time.sleep(1)
        
        # Save progress every 10 skills
        if (i + 1) % 10 == 0:
            with open('skill_mapping_remaining.json', 'w', encoding='utf-8') as f:
                json.dump(skill_mapping, f, ensure_ascii=False, indent=2)
            print(f"Progress saved after {i+1} skills")
            
    except Exception as e:
        print(f"Error processing skill {skill_id}: {e}")
        # Save current progress before continuing
        with open('skill_mapping_remaining.json', 'w', encoding='utf-8') as f:
            json.dump(skill_mapping, f, ensure_ascii=False, indent=2)
        continue

Processing skill 1/1: 1000011
Skill ID 1000011 → Skill Name: Carnival Bonus (obsolete)


In [5]:
import json

# After your loop completes and you have the skill_mapping dictionary
# Save the dictionary to a JSON file
with open('remaining_skill_mapping.json', 'w', encoding='utf-8') as f:
    json.dump(skill_mapping, f, ensure_ascii=False, indent=2)

print(f"Saved {len(skill_mapping)} skill mappings to skill_mapping.json")

Saved 522 skill mappings to skill_mapping.json


In [6]:
import re
from tqdm import tqdm
import json

def replace_skill_names_in_html(html_content, skill_mapping):
    """
    Replace Japanese skill names in HTML with corresponding names from skill mapping
    based on skill IDs found in parentheses
    """
    # Pattern to match the entire div structure with skill name and ID
    pattern = r'(<div><strong><span class="svelte-16f7yj7">)(.*?)(</span></strong>\s*\((\d+)\)</div>)'
    
    # Find all matches
    matches = list(re.finditer(pattern, html_content, re.DOTALL))
    
    if not matches:
        print("No skill patterns found in HTML")
        return html_content
    
    print(f"Found {len(matches)} skill patterns to process")
    
    # Process matches in reverse order to maintain string positions
    modified_html = html_content
    replacements_made = 0
    
    for match in tqdm(reversed(matches), total=len(matches), desc="Replacing skill names"):
        opening_tag = match.group(1)
        current_name = match.group(2)
        closing_part = match.group(3)
        skill_id = match.group(4)
        
        # Check if we have a replacement for this skill ID
        if skill_id in skill_mapping:
            new_name = skill_mapping[skill_id]
            # Replace the entire match with the new name
            new_content = opening_tag + new_name + closing_part
            modified_html = modified_html[:match.start()] + new_content + modified_html[match.end():]
            replacements_made += 1
            print(f"  Replaced '{current_name}' with '{new_name}' for ID {skill_id}")
        else:
            print(f"  No replacement found for skill ID {skill_id} ('{current_name}')")
    
    print(f"\nTotal replacements made: {replacements_made}/{len(matches)}")
    return modified_html

# Load the skill mapping from JSON
try:
    with open('remaining_skill_mapping.json', 'r', encoding='utf-8') as f:
        replacement_mapping = json.load(f)
    print(f"Loaded {len(replacement_mapping)} skill name mappings")
except FileNotFoundError:
    print("remaining_skill_mapping.json not found. Please run the scraping script first.")
    replacement_mapping = {}

# Load the HTML file
try:
    with open("umamusume.html", 'r', encoding='utf-8') as f:
        original_html = f.read()
    print("Loaded HTML file successfully")
except FileNotFoundError:
    print("umamusume.html not found")
    original_html = ""

# Replace skill names if we have both files
if replacement_mapping and original_html:
    modified_html = replace_skill_names_in_html(original_html, replacement_mapping)
    
    # Save the modified HTML to a new file
    with open("umamusume_eng.html", 'w', encoding='utf-8') as f:
        f.write(modified_html)
    
    print("Modified HTML saved to 'umamusume_translated.html'")
else:
    print("Cannot proceed - missing required files")

Loaded 522 skill name mappings
Loaded HTML file successfully
Found 522 skill patterns to process


Replacing skill names:  25%|██▌       | 132/522 [00:00<00:00, 651.36it/s]

  Replaced 'カーニバルボーナス' with 'Carnival Bonus (obsolete)' for ID 1000011
  Replaced '禾スナハチ登ル' with 'Bountiful Harvest' for ID 910561
  Replaced '113転び114起き' with 'Fall Down 113 Times, Get Up 114' for ID 910521
  Replaced 'ぐるぐるマミートリック♡' with 'Round and Round, Mummy Trick♡' for ID 910451
  Replaced 'GET DOWN' with 'GET DOWN' for ID 910401
  Replaced 'Guten Appetit♪' with 'Guten Appetit♪' for ID 910371
  Replaced 'Drain for rose' with 'Drain for Rose' for ID 910301
  Replaced 'オペレーション・Cacao' with 'Operation Cacao' for ID 910261
  Replaced 'フラワリー☆マニューバ' with 'Flowery☆Maneuver' for ID 910241
  Replaced 'Presents from X' with 'Presents from X' for ID 910231
  Replaced '薫風、永遠なる瞬間を' with 'Eternal Moments' for ID 910181
  Replaced '翳り退く、さざめきの矢' with 'Howling Arrow Piercing the Darkness' for ID 910171
  Replaced '恵福バルカローレ' with 'Blessed Barcarolle' for ID 910151
  Replaced 'コンドル猛撃波' with 'Condor's Fury' for ID 910141
  Replaced '最強の名を懸けて' with 'Legacy of the Strong' for ID 910131
  Replaced 'ゲインヒー

Replacing skill names:  52%|█████▏    | 271/522 [00:00<00:00, 665.09it/s]

  Replaced '追込直線◎' with 'End Closer Straightaways ◎' for ID 201451
  Replaced '鋭い眼光' with 'Sharp Gaze' for ID 201442
  Replaced '八方にらみ' with 'All-Seeing Eyes' for ID 201441
  Replaced '読解力' with 'Studious' for ID 201432
  Replaced '大局観' with 'The Bigger Picture' for ID 201431
  Replaced '小休憩' with 'A Small Breather' for ID 201422
  Replaced 'リラックス' with 'Relax' for ID 201421
  Replaced '十万バリキ' with '1,500,000 CC' for ID 201412
  Replaced '百万バリキ' with '15,000,000 CC' for ID 201411
  Replaced 'がんばり屋' with 'Fighter' for ID 201402
  Replaced '努力家' with 'Hard Worker' for ID 201401
  Replaced '差しコーナー○' with 'Late Surger Corners ○' for ID 201392
  Replaced '差しコーナー◎' with 'Later Surger Corners ◎' for ID 201391
  Replaced '差し直線○' with 'Late Surger Straightaways ○' for ID 201382
  Replaced '差し直線◎' with 'Late Surger Straightaways ◎' for ID 201381
  Replaced 'かく乱' with 'Disorient' for ID 201372
  Replaced '幻惑のかく乱' with 'Dazzling Disorientation' for ID 201371
  Replaced 'まき直し' with 'Tactical Tweak'

Replacing skill names:  99%|█████████▉| 519/522 [00:00<00:00, 799.48it/s]

  Replaced '直線加速' with 'Straightaway Acceleration' for ID 200372
  Replaced '一陣の風' with 'Rushing Gale!' for ID 200371
  Replaced '直線巧者' with 'Straightaway Adept' for ID 200362
  Replaced 'ハヤテ一文字' with 'Beeline Burst' for ID 200361
  Replaced 'コーナー回復×' with 'Corner Recovery ×' for ID 200353
  Replaced 'コーナー回復○' with 'Corner Recovery ○' for ID 200352
  Replaced '円弧のマエストロ' with 'Swinging Maestro' for ID 200351
  Replaced 'コーナー加速×' with 'Corner Acceleration ×' for ID 200343
  Replaced 'コーナー加速○' with 'Corner Acceleration ○' for ID 200342
  Replaced '曲線のソムリエ' with 'Corner Connoisseur' for ID 200341
  Replaced 'コーナー巧者×' with 'Corner Adept ×' for ID 200333
  Replaced 'コーナー巧者○' with 'Corner Adept ○' for ID 200332
  Replaced '弧線のプロフェッサー' with 'Professor of Curvature' for ID 200331
  Replaced '小心者' with 'Paddock Fright' for ID 200321
  Replaced 'GⅠ苦手' with 'G1 Averseness' for ID 200311
  Replaced '伏兵○' with 'Long Shot ○' for ID 200302
  Replaced '伏兵◎' with 'Long Shot ◎' for ID 200301
  Replaced '

Replacing skill names: 100%|██████████| 522/522 [00:00<00:00, 725.65it/s]


Total replacements made: 522/522
Modified HTML saved to 'umamusume_translated.html'



