In [None]:
import os
import re
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import time

BASE_URL = "https://wiki.factorio.com"
CATEGORY_URL = "/Category:Game_images"

OUTPUT_DIR = "factorio_icons"
os.makedirs(OUTPUT_DIR, exist_ok=True)

def slugify(name):
    return re.sub(r'[^\w\-]', '', name.lower().replace(' ', '-'))

def download_image_from_file_page(file_page_url):
    full_file_page_url = urljoin(BASE_URL, file_page_url)
    print(f"Visiting file page: {full_file_page_url}")
    res = requests.get(full_file_page_url)
    soup = BeautifulSoup(res.text, 'html.parser')

    image_div = soup.find('div', class_='fullImageLink')
    if image_div:
        img_tag = image_div.find('a')
        if img_tag and 'href' in img_tag.attrs:
            image_url = urljoin(BASE_URL, img_tag['href'])
            filename = os.path.basename(image_url)
            filename_slug = slugify(filename)
            save_path = os.path.join(OUTPUT_DIR, filename_slug)
            if os.path.exists(save_path):
                print(f"Already exists: {filename_slug}")
                return
            print(f"Downloading: {image_url}")
            image_data = requests.get(image_url).content
            with open(save_path, 'wb') as f:
                f.write(image_data)
            return
    print("No image found on the file page.")

def scrape_all_pages(start_url):
    next_url = start_url

    while next_url:
        full_url = urljoin(BASE_URL, next_url)
        print(f"\n[PAGE] {full_url}")
        res = requests.get(full_url)
        soup = BeautifulSoup(res.text, 'html.parser')

        file_links = soup.select('div.mw-category-generated a')
        if not file_links:
            print("No file links found on page.")

        for a in file_links:
            href = a.get('href')
            if href and href.startswith("/File:"):
                download_image_from_file_page(href)
                time.sleep(0.2)

        next_link = soup.find('a', string='next page')
        next_url = next_link['href'] if next_link else None

        

scrape_all_pages(CATEGORY_URL)



[PAGE] https://wiki.factorio.com/Category:Game_images
Visiting file page: https://wiki.factorio.com/File:Accumulator.png
  ↓ Downloading: https://wiki.factorio.com/images/Accumulator.png
Visiting file page: https://wiki.factorio.com/File:Accumulator.png
  ✅ Already exists: accumulatorpng
Visiting file page: https://wiki.factorio.com/File:Acid_neutralisation.png
  ↓ Downloading: https://wiki.factorio.com/images/Acid_neutralisation.png
Visiting file page: https://wiki.factorio.com/File:Acid_neutralisation.png
  ✅ Already exists: acid_neutralisationpng
Visiting file page: https://wiki.factorio.com/File:Active_provider_chest.png
  ↓ Downloading: https://wiki.factorio.com/images/Active_provider_chest.png
Visiting file page: https://wiki.factorio.com/File:Active_provider_chest.png
  ✅ Already exists: active_provider_chestpng
Visiting file page: https://wiki.factorio.com/File:Advanced_asteroid_processing_(research).png
  ↓ Downloading: https://wiki.factorio.com/images/Advanced_asteroid_proce

In [None]:
ICON_DIR = "factorio_icons"

for filename in os.listdir(ICON_DIR):
    old_path = os.path.join(ICON_DIR, filename)
    if "." in os.path.splitext(filename)[-1]:
        continue
    match = re.match(r"^(.*?)(png|jpg|jpeg|gif)$", filename, re.IGNORECASE)
    if match:
        name, ext = match.groups()
        new_filename = f"{name}.{ext}"
        new_path = os.path.join(ICON_DIR, new_filename)
        os.rename(old_path, new_path)
        print(f"Renamed: {filename} → {new_filename}")

✅ Renamed: accumulatorpng → accumulator.png
✅ Renamed: acid_neutralisationpng → acid_neutralisation.png
✅ Renamed: active_provider_chestpng → active_provider_chest.png
✅ Renamed: advanced_asteroid_processing_28research29png → advanced_asteroid_processing_28research29.png
✅ Renamed: advanced_carbonic_asteroid_crushingpng → advanced_carbonic_asteroid_crushing.png
✅ Renamed: advanced_chemistry_28research29png → advanced_chemistry_28research29.png
✅ Renamed: advanced_circuitpng → advanced_circuit.png
✅ Renamed: advanced_circuit_28research29png → advanced_circuit_28research29.png
✅ Renamed: advanced_combinators_28research29png → advanced_combinators_28research29.png
✅ Renamed: advanced_electronics_28research29png → advanced_electronics_28research29.png
✅ Renamed: advanced_electronics_2_28research29png → advanced_electronics_2_28research29.png
✅ Renamed: advanced_material_processing_28research29png → advanced_material_processing_28research29.png
✅ Renamed: advanced_material_processing_2_28re